sleepyhead111 commited on
Commit
9543496
·
verified ·
1 Parent(s): 55f12b9

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. mosesdecoder/contrib/goshen-chrome/README.md +93 -0
  2. mosesdecoder/contrib/goshen-chrome/icon.png +0 -0
  3. mosesdecoder/contrib/goshen-chrome/manifest.json +29 -0
  4. mosesdecoder/contrib/goshen-chrome/style.less +22 -0
  5. mosesdecoder/contrib/lmserver/COPYING +33 -0
  6. mosesdecoder/contrib/lmserver/Makefile.am +18 -0
  7. mosesdecoder/contrib/lmserver/NEWS +0 -0
  8. mosesdecoder/contrib/lmserver/config.h.in +97 -0
  9. mosesdecoder/contrib/lmserver/config.status +1207 -0
  10. mosesdecoder/contrib/lmserver/config.sub +1676 -0
  11. mosesdecoder/contrib/lmserver/configure.ac +235 -0
  12. mosesdecoder/contrib/lmserver/depcomp +589 -0
  13. mosesdecoder/contrib/lmserver/lmserver.h +375 -0
  14. mosesdecoder/contrib/lmserver/stamp-h1 +1 -0
  15. mosesdecoder/contrib/mert-moses-multi.pl +1529 -0
  16. mosesdecoder/contrib/relent-filter/src/IOWrapper.cpp +580 -0
  17. mosesdecoder/contrib/relent-filter/src/Jamfile +6 -0
  18. mosesdecoder/contrib/relent-filter/src/Main.h +39 -0
  19. mosesdecoder/contrib/relent-filter/src/TranslationAnalysis.cpp +126 -0
  20. mosesdecoder/contrib/relent-filter/src/mbr.h +28 -0
  21. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h +180 -0
  22. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-beam.h +164 -0
  23. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-const.h +125 -0
  24. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-crf.h +359 -0
  25. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-dtree-cont.h +479 -0
  26. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h +242 -0
  27. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-gauss.h +287 -0
  28. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hash.h +105 -0
  29. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-heap.h +181 -0
  30. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hmmloop.h +397 -0
  31. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-iomacros.h +63 -0
  32. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-list.h +481 -0
  33. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-matrix.h +177 -0
  34. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-modelfile.h +126 -0
  35. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-prob.h +136 -0
  36. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-racpt.h +332 -0
  37. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-randvar.h +593 -0
  38. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-refrv.h +74 -0
  39. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-timer.h +52 -0
  40. mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-tree.h +43 -0
  41. mosesdecoder/contrib/zmert-moses.pl +1121 -0
  42. mosesdecoder/moses/TranslationModel/UG/TargetPhraseCollectionCache.h +47 -0
  43. mosesdecoder/moses/TranslationModel/UG/bitext-find.cc +151 -0
  44. mosesdecoder/moses/TranslationModel/UG/check-coverage.cc +82 -0
  45. mosesdecoder/moses/TranslationModel/UG/filter-pt.cc +669 -0
  46. mosesdecoder/moses/TranslationModel/UG/ptable-describe-features.cc +40 -0
  47. mosesdecoder/moses/TranslationModel/UG/sapt_pscore_cumulative_bias.h +39 -0
  48. mosesdecoder/moses/TranslationModel/UG/sapt_pscore_logcnt.h +62 -0
  49. mosesdecoder/moses/TranslationModel/UG/sapt_pscore_rareness.h +38 -0
  50. mosesdecoder/moses/TranslationModel/UG/sapt_pscore_wordcount.h +33 -0
mosesdecoder/contrib/goshen-chrome/README.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # goshen
2
+
3
+ Goshen is a Chrome extension that duplicates the utility of the Google Translate chrome extension for on-page website translation, using the Goshen JavaScript library with Moses as a backend translator. (It also has the ability to swap in an arbitrary translation engine, if the appropriate adapters are written.)
4
+
5
+
6
+ ## 1. The Goshen.js Library
7
+ As Google Translate is the current go-to machine-translation system for developers, I intend to make Moses a viable alternative for even the non-savvy developer. This is in large part simplified by having an easily deployed (perhaps Dockerized) Moses server, as mentioned in the section above. However, it is also greatly simplified by exposing a comprehensive and well-formed JavaScript API that allows the same level of flexibility as the existing Google API.
8
+
9
+ Instead of trying to duplicate the Google Translate API, I instead chose to write a wrapper for *any* translation engine. An engine with an exposed HTTP endpoint can be added to the Goshen translation library by implementing `GoshenAdapter`, for which I have provided a complete `moses-mt-server` implementation (`MosesGoshenAdapter`) and a partially complete proof of concept for Google Translate (`GoogleTranslateGoshenAdapter`). This is to illustrate that the engines can be used interchangeably for simple translation tasks, but the entirety of Moses functionality can be accessed whereas Google Translate's public API fails to accommodate some more technical tasks.
10
+
11
+ The library is both commented and minified, available in the `goshenlib/` directory, [here](https://github.com/j6k4m8/goshen-moses). It is also possible to import the unminified, importable version from `goshenlib/dist`. The complete documentation, as well as usage examples and implementation explanations and justifications, are available in `goshenlib/docs` at the above repository.
12
+
13
+ ## 2. Chrome Extension
14
+ This directory contains a Chrome extension that utilizes the CASMACAT moses-mt-server/Moses backend to provide a frontend website translation service. The extension automatically detects the relevant content of most articles or body-text on the page, and at the user's request, translates it to the requested language. Usage is explained below, as well as inside the extension popup after installation, for quick reference.
15
+
16
+ ### Usage
17
+ 1. **Install the unpacked extension.** Go to `chrome://extensions` and click <kbd>Load Unpacked Extension</kbd>. Navigate to this `goshen-chrome/` directory, and load.
18
+ 2. This adds a Goshen icon to your Chrome toolbar. Clicking it brings up a simple modal that allows the switching of languages.
19
+ 3. Use the <kbd>Alt</kbd>+<kbd>T</kbd> key-chord ("T" for "Translate") to begin text-selection. The Goshen-translate extension will highlight elements of text in cyan as you mouse over them: To translate what is currently highlighted, click.
20
+
21
+ ## Goshen.js Documentatio
22
+
23
+ ### Overview
24
+ The Goshen library provides a web-developer-facing library for handling machine translation. It allows interaction with arbitrary machine translation services, agnostic of the technology or algorithm stack.
25
+
26
+ ### Usage
27
+ A very brief tutorial is provided here:
28
+
29
+ - Create a new Goshen object. Use the MosesGoshenAdapter, so that translations are handled by a Moses MT server.
30
+ ```JavaScript
31
+ g = new Goshen('localhost:3000', 'http', MosesGoshenAdapter);
32
+ ```
33
+ - Use the Goshen object to pass a translation job to the Moses adapter. The adapter will pass back a completed translation once the job completes.
34
+ ```JavaScript
35
+ g.translate('This is a simple sentence.', Languages.ENGLISH, Languages.SPANISH);
36
+ ```
37
+ - You can also optionally pass a callback function to the .translate method:
38
+ ```JavaScript
39
+ g.translate('This is a simple sentence.',
40
+ Languages.ENGLISH,
41
+ Languages.SPANISH,
42
+ function(err, val) {
43
+ if (!!err) {
44
+ console.warn("Encountered an error: " + err);
45
+ } else {
46
+ console.info("Translated to: " + val);
47
+ }
48
+ });
49
+ ```
50
+ If a callback is supplied, the function is run on a new thread, and is non-blocking. If one is not supplied, then the return value of the function contains the translated text. `undefined` is returned if the translation fails.
51
+
52
+
53
+ ### `Goshen`
54
+ The generic class for a Goshen.js object, the object that handles translation with an arbitrary translation backend. In order to specify a backend, pass a `type` parameter to the constructor. (Default is Moses, of course!)
55
+
56
+ - `Goshen`
57
+ - Arguments:
58
+ - `hostname`: A string hostname, such as `locahost:8000`. This is the base URL for formulating the RESTful API endpoint.
59
+ - `protocol`: The HTTP protocol. Either `http` or `https`.
60
+ - `type`: What type of GoshenAdapter to use. Options are currently `GoogleTranslateGoshenAdapter` or `MosesGoshenAdapter`.
61
+ - `opts`: A dictonary of options to pass to the adapter constructor. Currently, none are required for existing adapters.
62
+
63
+ - function `url`
64
+
65
+ Generate a complete URI. If `hostname` is `localhost:8000` and `protocol` is `https`, then `this.url('foo')` returns `https://localhost:8000/foo`
66
+ - Arguments:
67
+ - `suffix`: A suffix to concatenate onto the end of a well-formed URI.
68
+ - Returns:
69
+ - String: The complete web-accessible URL.
70
+
71
+ - function `translate`
72
+
73
+ Translate a text from a source language to a target language.
74
+ - Arguments:
75
+ - `text`: The text to translate. If this is too long, a series of truncated versions are translated, splitting on sentence-delimiters if possible.
76
+ - `source`: An item from the `LANGUAGES` set (e.g. `'en-us'`)
77
+ - `target`: An item from the `LANGUAGES` set (e.g. `'en-us'`)
78
+ - `callback`: Optional. If supplied, must be a function (or be of a callable type) that will be run with `errors` and `value` as its two arguments.
79
+ - Returns:
80
+ - String: The translated text. All supplementary data, such as alignments or language detections, are ignored by this function.
81
+
82
+
83
+ ### `GoshenAdapter`
84
+ The `Goshen` class secretly outsources all of its computation to a GoshenAdapter class attribute, which is responsible for performing the machine translation. `GoshenAdapter`s should expose `url` and `translate` functions unambiguously, with the same signatures as those in the `Goshen` class. Other functions may be optionally exposed.
85
+
86
+ #### `MosesGoshenAdapter`
87
+ This is one particular implementation of the `GoshenAdapter` type, that uses the `moses-mt-server` backend as its translation engine API endpoint. It splits text into manageable chunks when translating, to avoid crashing the underlying Moses server (RAM allocation fail).
88
+
89
+ #### `GoogleTranslateGoshenAdapter`
90
+ This is another implementation of the `GoshenAdapter` type, that uses the Google Translate API as its translation engine endpoint. Because Google handles arbitrarily long text, this adapter does not split text, as `MosesGoshenAdapter`s do.
91
+
92
+
93
+ For more information, see [this full report](https://github.com/j6k4m8/goshen-moses/blob/master/report/report.md), or contact Jordan Matelsky (@j6k4m8).
mosesdecoder/contrib/goshen-chrome/icon.png ADDED
mosesdecoder/contrib/goshen-chrome/manifest.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "manifest_version": 2,
3
+
4
+ "name": "Goshen Web Translator",
5
+ "description": "Translate entire webpages with a casmacat-moses backend",
6
+ "version": "1.0",
7
+
8
+ "browser_action": {
9
+ "default_icon": "icon.png",
10
+ "default_popup": "popup/popup.html"
11
+ },
12
+ "permissions": [
13
+ "activeTab",
14
+ "storage",
15
+ "https://ajax.googleapis.com/"
16
+ ],
17
+ "options_page" : "options/index.html",
18
+
19
+ "content_scripts": [{
20
+ "matches": ["http://*/*", "https://*/*", "file:///*"],
21
+ "css": ["onpage/onpage.css"],
22
+ "js": [
23
+ "onpage/onpage.js",
24
+ "onpage/goshen.js",
25
+ "onpage/chromegoshen.js"
26
+ ],
27
+ "all_frames": true
28
+ }]
29
+ }
mosesdecoder/contrib/goshen-chrome/style.less ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ * {
2
+ box-sizing: border-box;
3
+ }
4
+ .container {
5
+ width: 100%;
6
+ .dropdown-container, .arrow-container {
7
+ display: inline-block;
8
+ width: 40%;
9
+ }
10
+
11
+ button {
12
+ border-radius: 0;
13
+ background: #09f;
14
+ color: white;
15
+ text-transform: uppercase;
16
+ padding: 1em;
17
+ border: none;
18
+ cursor: pointer;
19
+ letter-spacing: 0.1em;
20
+ font-size: 1.1em;
21
+ }
22
+ }
mosesdecoder/contrib/lmserver/COPYING ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2009, Chris Dyer
2
+
3
+ Portions of this software are
4
+ Copyright (c) 2003, Danga Interactive, Inc.
5
+ All rights reserved.
6
+
7
+ Redistribution and use in source and binary forms, with or without
8
+ modification, are permitted provided that the following conditions are
9
+ met:
10
+
11
+ * Redistributions of source code must retain the above copyright
12
+ notice, this list of conditions and the following disclaimer.
13
+
14
+ * Redistributions in binary form must reproduce the above
15
+ copyright notice, this list of conditions and the following disclaimer
16
+ in the documentation and/or other materials provided with the
17
+ distribution.
18
+
19
+ * Neither the name of the Danga Interactive nor the names of its
20
+ contributors may be used to endorse or promote products derived from
21
+ this software without specific prior written permission.
22
+
23
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
mosesdecoder/contrib/lmserver/Makefile.am ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bin_PROGRAMS = lmserver lmserver-debug
2
+
3
+ lmserver_SOURCES = lmserver.c lmserver.h thread.c srilm.cc
4
+ lmserver_debug_SOURCES = $(lmserver_SOURCES)
5
+ lmserver_CPPFLAGS = -DNDEBUG
6
+ lmserver_LDADD = @DAEMON_OBJ@
7
+ lmserver_debug_LDADD = @DAEMON_OBJ@
8
+ lmserver_DEPENDENCIES = @DAEMON_OBJ@
9
+ lmserver_debug_DEPENDENCIES = @DAEMON_OBJ@
10
+
11
+ DIST_DIRS = examples
12
+ EXTRA_DIST = examples daemon.c
13
+
14
+ dist-hook:
15
+ rm -rf $(distdir)/doc/.svn/
16
+ rm -rf $(distdir)/scripts/.svn/
17
+ rm -rf $(distdir)/t/.svn/
18
+ rm -rf $(distdir)/t/lib/.svn/
mosesdecoder/contrib/lmserver/NEWS ADDED
File without changes
mosesdecoder/contrib/lmserver/config.h.in ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* config.h.in. Generated from configure.ac by autoheader. */
2
+
3
+ /* machine is bigendian */
4
+ #undef ENDIAN_BIG
5
+
6
+ /* machine is littleendian */
7
+ #undef ENDIAN_LITTLE
8
+
9
+ /* Define this if you have daemon() */
10
+ #undef HAVE_DAEMON
11
+
12
+ /* Define to 1 if you have the `getpagesizes' function. */
13
+ #undef HAVE_GETPAGESIZES
14
+
15
+ /* Define to 1 if you have the <inttypes.h> header file. */
16
+ #undef HAVE_INTTYPES_H
17
+
18
+ /* do we have malloc.h? */
19
+ #undef HAVE_MALLOC_H
20
+
21
+ /* Define to 1 if you have the `memcntl' function. */
22
+ #undef HAVE_MEMCNTL
23
+
24
+ /* Define to 1 if you have the <memory.h> header file. */
25
+ #undef HAVE_MEMORY_H
26
+
27
+ /* Define to 1 if you have the `mlockall' function. */
28
+ #undef HAVE_MLOCKALL
29
+
30
+ /* flag for SRILM */
31
+ #undef HAVE_SRILM
32
+
33
+ /* Define to 1 if stdbool.h conforms to C99. */
34
+ #undef HAVE_STDBOOL_H
35
+
36
+ /* Define to 1 if you have the <stdint.h> header file. */
37
+ #undef HAVE_STDINT_H
38
+
39
+ /* Define to 1 if you have the <stdlib.h> header file. */
40
+ #undef HAVE_STDLIB_H
41
+
42
+ /* Define to 1 if you have the <strings.h> header file. */
43
+ #undef HAVE_STRINGS_H
44
+
45
+ /* Define to 1 if you have the <string.h> header file. */
46
+ #undef HAVE_STRING_H
47
+
48
+ /* do we have stuct mallinfo? */
49
+ #undef HAVE_STRUCT_MALLINFO
50
+
51
+ /* Define to 1 if you have the <sys/stat.h> header file. */
52
+ #undef HAVE_SYS_STAT_H
53
+
54
+ /* Define to 1 if you have the <sys/types.h> header file. */
55
+ #undef HAVE_SYS_TYPES_H
56
+
57
+ /* Define to 1 if you have the <unistd.h> header file. */
58
+ #undef HAVE_UNISTD_H
59
+
60
+ /* Define to 1 if the system has the type `_Bool'. */
61
+ #undef HAVE__BOOL
62
+
63
+ /* Define to 1 if your C compiler doesn't accept -c and -o together. */
64
+ #undef NO_MINUS_C_MINUS_O
65
+
66
+ /* Name of package */
67
+ #undef PACKAGE
68
+
69
+ /* Define to the address where bug reports for this package should be sent. */
70
+ #undef PACKAGE_BUGREPORT
71
+
72
+ /* Define to the full name of this package. */
73
+ #undef PACKAGE_NAME
74
+
75
+ /* Define to the full name and version of this package. */
76
+ #undef PACKAGE_STRING
77
+
78
+ /* Define to the one symbol short name of this package. */
79
+ #undef PACKAGE_TARNAME
80
+
81
+ /* Define to the version of this package. */
82
+ #undef PACKAGE_VERSION
83
+
84
+ /* Define to 1 if you have the ANSI C header files. */
85
+ #undef STDC_HEADERS
86
+
87
+ /* Define this if you want to use pthreads */
88
+ #undef USE_THREADS
89
+
90
+ /* Version number of package */
91
+ #undef VERSION
92
+
93
+ /* Define to empty if `const' does not conform to ANSI C. */
94
+ #undef const
95
+
96
+ /* define to int if socklen_t not available */
97
+ #undef socklen_t
mosesdecoder/contrib/lmserver/config.status ADDED
@@ -0,0 +1,1207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /bin/sh
2
+ # Generated by configure.
3
+ # Run this file to recreate the current configuration.
4
+ # Compiler output produced by configure, useful for debugging
5
+ # configure, is in config.log if it exists.
6
+
7
+ debug=false
8
+ ac_cs_recheck=false
9
+ ac_cs_silent=false
10
+ SHELL=${CONFIG_SHELL-/bin/sh}
11
+ ## --------------------- ##
12
+ ## M4sh Initialization. ##
13
+ ## --------------------- ##
14
+
15
+ # Be Bourne compatible
16
+ if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
17
+ emulate sh
18
+ NULLCMD=:
19
+ # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
20
+ # is contrary to our usage. Disable this feature.
21
+ alias -g '${1+"$@"}'='"$@"'
22
+ elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
23
+ set -o posix
24
+ fi
25
+ DUALCASE=1; export DUALCASE # for MKS sh
26
+
27
+ # Support unset when possible.
28
+ if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
29
+ as_unset=unset
30
+ else
31
+ as_unset=false
32
+ fi
33
+
34
+
35
+ # Work around bugs in pre-3.0 UWIN ksh.
36
+ $as_unset ENV MAIL MAILPATH
37
+ PS1='$ '
38
+ PS2='> '
39
+ PS4='+ '
40
+
41
+ # NLS nuisances.
42
+ for as_var in \
43
+ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
44
+ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
45
+ LC_TELEPHONE LC_TIME
46
+ do
47
+ if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
48
+ eval $as_var=C; export $as_var
49
+ else
50
+ $as_unset $as_var
51
+ fi
52
+ done
53
+
54
+ # Required to use basename.
55
+ if expr a : '\(a\)' >/dev/null 2>&1; then
56
+ as_expr=expr
57
+ else
58
+ as_expr=false
59
+ fi
60
+
61
+ if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
62
+ as_basename=basename
63
+ else
64
+ as_basename=false
65
+ fi
66
+
67
+
68
+ # Name of the executable.
69
+ as_me=`$as_basename "$0" ||
70
+ $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
71
+ X"$0" : 'X\(//\)$' \| \
72
+ X"$0" : 'X\(/\)$' \| \
73
+ . : '\(.\)' 2>/dev/null ||
74
+ echo X/"$0" |
75
+ sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
76
+ /^X\/\(\/\/\)$/{ s//\1/; q; }
77
+ /^X\/\(\/\).*/{ s//\1/; q; }
78
+ s/.*/./; q'`
79
+
80
+
81
+ # PATH needs CR, and LINENO needs CR and PATH.
82
+ # Avoid depending upon Character Ranges.
83
+ as_cr_letters='abcdefghijklmnopqrstuvwxyz'
84
+ as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
85
+ as_cr_Letters=$as_cr_letters$as_cr_LETTERS
86
+ as_cr_digits='0123456789'
87
+ as_cr_alnum=$as_cr_Letters$as_cr_digits
88
+
89
+ # The user is always right.
90
+ if test "${PATH_SEPARATOR+set}" != set; then
91
+ echo "#! /bin/sh" >conf$$.sh
92
+ echo "exit 0" >>conf$$.sh
93
+ chmod +x conf$$.sh
94
+ if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
95
+ PATH_SEPARATOR=';'
96
+ else
97
+ PATH_SEPARATOR=:
98
+ fi
99
+ rm -f conf$$.sh
100
+ fi
101
+
102
+
103
+ as_lineno_1=$LINENO
104
+ as_lineno_2=$LINENO
105
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
106
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
107
+ test "x$as_lineno_3" = "x$as_lineno_2" || {
108
+ # Find who we are. Look in the path if we contain no path at all
109
+ # relative or not.
110
+ case $0 in
111
+ *[\\/]* ) as_myself=$0 ;;
112
+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
113
+ for as_dir in $PATH
114
+ do
115
+ IFS=$as_save_IFS
116
+ test -z "$as_dir" && as_dir=.
117
+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
118
+ done
119
+
120
+ ;;
121
+ esac
122
+ # We did not find ourselves, most probably we were run as `sh COMMAND'
123
+ # in which case we are not to be found in the path.
124
+ if test "x$as_myself" = x; then
125
+ as_myself=$0
126
+ fi
127
+ if test ! -f "$as_myself"; then
128
+ { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
129
+ echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
130
+ { (exit 1); exit 1; }; }
131
+ fi
132
+ case $CONFIG_SHELL in
133
+ '')
134
+ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
135
+ for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
136
+ do
137
+ IFS=$as_save_IFS
138
+ test -z "$as_dir" && as_dir=.
139
+ for as_base in sh bash ksh sh5; do
140
+ case $as_dir in
141
+ /*)
142
+ if ("$as_dir/$as_base" -c '
143
+ as_lineno_1=$LINENO
144
+ as_lineno_2=$LINENO
145
+ as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
146
+ test "x$as_lineno_1" != "x$as_lineno_2" &&
147
+ test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
148
+ $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
149
+ $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
150
+ CONFIG_SHELL=$as_dir/$as_base
151
+ export CONFIG_SHELL
152
+ exec "$CONFIG_SHELL" "$0" ${1+"$@"}
153
+ fi;;
154
+ esac
155
+ done
156
+ done
157
+ ;;
158
+ esac
159
+
160
+ # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
161
+ # uniformly replaced by the line number. The first 'sed' inserts a
162
+ # line-number line before each line; the second 'sed' does the real
163
+ # work. The second script uses 'N' to pair each line-number line
164
+ # with the numbered line, and appends trailing '-' during
165
+ # substitution so that $LINENO is not a special case at line end.
166
+ # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
167
+ # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
168
+ sed '=' <$as_myself |
169
+ sed '
170
+ N
171
+ s,$,-,
172
+ : loop
173
+ s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
174
+ t loop
175
+ s,-$,,
176
+ s,^['$as_cr_digits']*\n,,
177
+ ' >$as_me.lineno &&
178
+ chmod +x $as_me.lineno ||
179
+ { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
180
+ echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
181
+ { (exit 1); exit 1; }; }
182
+
183
+ # Don't try to exec as it changes $[0], causing all sort of problems
184
+ # (the dirname of $[0] is not the place where we might find the
185
+ # original and so on. Autoconf is especially sensible to this).
186
+ . ./$as_me.lineno
187
+ # Exit status is that of the last command.
188
+ exit
189
+ }
190
+
191
+
192
+ case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
193
+ *c*,-n*) ECHO_N= ECHO_C='
194
+ ' ECHO_T=' ' ;;
195
+ *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
196
+ *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
197
+ esac
198
+
199
+ if expr a : '\(a\)' >/dev/null 2>&1; then
200
+ as_expr=expr
201
+ else
202
+ as_expr=false
203
+ fi
204
+
205
+ rm -f conf$$ conf$$.exe conf$$.file
206
+ echo >conf$$.file
207
+ if ln -s conf$$.file conf$$ 2>/dev/null; then
208
+ # We could just check for DJGPP; but this test a) works b) is more generic
209
+ # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
210
+ if test -f conf$$.exe; then
211
+ # Don't use ln at all; we don't have any links
212
+ as_ln_s='cp -p'
213
+ else
214
+ as_ln_s='ln -s'
215
+ fi
216
+ elif ln conf$$.file conf$$ 2>/dev/null; then
217
+ as_ln_s=ln
218
+ else
219
+ as_ln_s='cp -p'
220
+ fi
221
+ rm -f conf$$ conf$$.exe conf$$.file
222
+
223
+ if mkdir -p . 2>/dev/null; then
224
+ as_mkdir_p=:
225
+ else
226
+ test -d ./-p && rmdir ./-p
227
+ as_mkdir_p=false
228
+ fi
229
+
230
+ as_executable_p="test -f"
231
+
232
+ # Sed expression to map a string onto a valid CPP name.
233
+ as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
234
+
235
+ # Sed expression to map a string onto a valid variable name.
236
+ as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
237
+
238
+
239
+ # IFS
240
+ # We need space, tab and new line, in precisely that order.
241
+ as_nl='
242
+ '
243
+ IFS=" $as_nl"
244
+
245
+ # CDPATH.
246
+ $as_unset CDPATH
247
+
248
+ exec 6>&1
249
+
250
+ # Open the log real soon, to keep \$[0] and so on meaningful, and to
251
+ # report actual input values of CONFIG_FILES etc. instead of their
252
+ # values after options handling. Logging --version etc. is OK.
253
+ exec 5>>config.log
254
+ {
255
+ echo
256
+ sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
257
+ ## Running $as_me. ##
258
+ _ASBOX
259
+ } >&5
260
+ cat >&5 <<_CSEOF
261
+
262
+ This file was extended by lmserver $as_me 1.0, which was
263
+ generated by GNU Autoconf 2.59. Invocation command line was
264
+
265
+ CONFIG_FILES = $CONFIG_FILES
266
+ CONFIG_HEADERS = $CONFIG_HEADERS
267
+ CONFIG_LINKS = $CONFIG_LINKS
268
+ CONFIG_COMMANDS = $CONFIG_COMMANDS
269
+ $ $0 $@
270
+
271
+ _CSEOF
272
+ echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
273
+ echo >&5
274
+ config_files=" Makefile"
275
+ config_headers=" config.h"
276
+ config_commands=" depfiles"
277
+
278
+ ac_cs_usage="\
279
+ \`$as_me' instantiates files from templates according to the
280
+ current configuration.
281
+
282
+ Usage: $0 [OPTIONS] [FILE]...
283
+
284
+ -h, --help print this help, then exit
285
+ -V, --version print version number, then exit
286
+ -q, --quiet do not print progress messages
287
+ -d, --debug don't remove temporary files
288
+ --recheck update $as_me by reconfiguring in the same conditions
289
+ --file=FILE[:TEMPLATE]
290
+ instantiate the configuration file FILE
291
+ --header=FILE[:TEMPLATE]
292
+ instantiate the configuration header FILE
293
+
294
+ Configuration files:
295
+ $config_files
296
+
297
+ Configuration headers:
298
+ $config_headers
299
+
300
+ Configuration commands:
301
+ $config_commands
302
+
303
+ Report bugs to <bug-autoconf@gnu.org>."
304
+ ac_cs_version="\
305
+ lmserver config.status 1.0
306
+ configured by ./configure, generated by GNU Autoconf 2.59,
307
+ with options \"'--with-libevent=/fs/clip-software/libevent-1.4.8-stable' '--with-srilm=/fs/clip-software/srilm-1.5.6-PIC'\"
308
+
309
+ Copyright (C) 2003 Free Software Foundation, Inc.
310
+ This config.status script is free software; the Free Software Foundation
311
+ gives unlimited permission to copy, distribute and modify it."
312
+ srcdir=.
313
+ INSTALL="/usr/bin/install -c"
314
+ # If no file are specified by the user, then we need to provide default
315
+ # value. By we need to know if files were specified by the user.
316
+ ac_need_defaults=:
317
+ while test $# != 0
318
+ do
319
+ case $1 in
320
+ --*=*)
321
+ ac_option=`expr "x$1" : 'x\([^=]*\)='`
322
+ ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
323
+ ac_shift=:
324
+ ;;
325
+ -*)
326
+ ac_option=$1
327
+ ac_optarg=$2
328
+ ac_shift=shift
329
+ ;;
330
+ *) # This is not an option, so the user has probably given explicit
331
+ # arguments.
332
+ ac_option=$1
333
+ ac_need_defaults=false;;
334
+ esac
335
+
336
+ case $ac_option in
337
+ # Handling of the options.
338
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
339
+ ac_cs_recheck=: ;;
340
+ --version | --vers* | -V )
341
+ echo "$ac_cs_version"; exit 0 ;;
342
+ --he | --h)
343
+ # Conflict between --help and --header
344
+ { { echo "$as_me:$LINENO: error: ambiguous option: $1
345
+ Try \`$0 --help' for more information." >&5
346
+ echo "$as_me: error: ambiguous option: $1
347
+ Try \`$0 --help' for more information." >&2;}
348
+ { (exit 1); exit 1; }; };;
349
+ --help | --hel | -h )
350
+ echo "$ac_cs_usage"; exit 0 ;;
351
+ --debug | --d* | -d )
352
+ debug=: ;;
353
+ --file | --fil | --fi | --f )
354
+ $ac_shift
355
+ CONFIG_FILES="$CONFIG_FILES $ac_optarg"
356
+ ac_need_defaults=false;;
357
+ --header | --heade | --head | --hea )
358
+ $ac_shift
359
+ CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
360
+ ac_need_defaults=false;;
361
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
362
+ | -silent | --silent | --silen | --sile | --sil | --si | --s)
363
+ ac_cs_silent=: ;;
364
+
365
+ # This is an error.
366
+ -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
367
+ Try \`$0 --help' for more information." >&5
368
+ echo "$as_me: error: unrecognized option: $1
369
+ Try \`$0 --help' for more information." >&2;}
370
+ { (exit 1); exit 1; }; } ;;
371
+
372
+ *) ac_config_targets="$ac_config_targets $1" ;;
373
+
374
+ esac
375
+ shift
376
+ done
377
+
378
+ ac_configure_extra_args=
379
+
380
+ if $ac_cs_silent; then
381
+ exec 6>/dev/null
382
+ ac_configure_extra_args="$ac_configure_extra_args --silent"
383
+ fi
384
+
385
+ if $ac_cs_recheck; then
386
+ echo "running /bin/sh ./configure " '--with-libevent=/fs/clip-software/libevent-1.4.8-stable' '--with-srilm=/fs/clip-software/srilm-1.5.6-PIC' $ac_configure_extra_args " --no-create --no-recursion" >&6
387
+ exec /bin/sh ./configure '--with-libevent=/fs/clip-software/libevent-1.4.8-stable' '--with-srilm=/fs/clip-software/srilm-1.5.6-PIC' $ac_configure_extra_args --no-create --no-recursion
388
+ fi
389
+
390
+ #
391
+ # INIT-COMMANDS section.
392
+ #
393
+
394
+ AMDEP_TRUE="" ac_aux_dir="."
395
+
396
+ for ac_config_target in $ac_config_targets
397
+ do
398
+ case "$ac_config_target" in
399
+ # Handling of arguments.
400
+ "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
401
+ "depfiles" ) CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
402
+ "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
403
+ *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
404
+ echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
405
+ { (exit 1); exit 1; }; };;
406
+ esac
407
+ done
408
+
409
+ # If the user did not use the arguments to specify the items to instantiate,
410
+ # then the envvar interface is used. Set only those that are not.
411
+ # We use the long form for the default assignment because of an extremely
412
+ # bizarre bug on SunOS 4.1.3.
413
+ if $ac_need_defaults; then
414
+ test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
415
+ test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
416
+ test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
417
+ fi
418
+
419
+ # Have a temporary directory for convenience. Make it in the build tree
420
+ # simply because there is no reason to put it here, and in addition,
421
+ # creating and moving files from /tmp can sometimes cause problems.
422
+ # Create a temporary directory, and hook for its removal unless debugging.
423
+ $debug ||
424
+ {
425
+ trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
426
+ trap '{ (exit 1); exit 1; }' 1 2 13 15
427
+ }
428
+
429
+ # Create a (secure) tmp directory for tmp files.
430
+
431
+ {
432
+ tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` &&
433
+ test -n "$tmp" && test -d "$tmp"
434
+ } ||
435
+ {
436
+ tmp=./confstat$$-$RANDOM
437
+ (umask 077 && mkdir $tmp)
438
+ } ||
439
+ {
440
+ echo "$me: cannot create a temporary directory in ." >&2
441
+ { (exit 1); exit 1; }
442
+ }
443
+
444
+
445
+ #
446
+ # CONFIG_FILES section.
447
+ #
448
+
449
+ # No need to generate the scripts if there are no CONFIG_FILES.
450
+ # This happens for instance when ./config.status config.h
451
+ if test -n "$CONFIG_FILES"; then
452
+ # Protect against being on the right side of a sed subst in config.status.
453
+ sed 's/,@/@@/; s/@,/@@/; s/,;t t$/@;t t/; /@;t t$/s/[\\&,]/\\&/g;
454
+ s/@@/,@/; s/@@/@,/; s/@;t t$/,;t t/' >$tmp/subs.sed <<\CEOF
455
+ s,@SHELL@,/bin/sh,;t t
456
+ s,@PATH_SEPARATOR@,:,;t t
457
+ s,@PACKAGE_NAME@,lmserver,;t t
458
+ s,@PACKAGE_TARNAME@,lmserver,;t t
459
+ s,@PACKAGE_VERSION@,1.0,;t t
460
+ s,@PACKAGE_STRING@,lmserver 1.0,;t t
461
+ s,@PACKAGE_BUGREPORT@,,;t t
462
+ s,@exec_prefix@,${prefix},;t t
463
+ s,@prefix@,/usr/local,;t t
464
+ s,@program_transform_name@,s,x,x,,;t t
465
+ s,@bindir@,${exec_prefix}/bin,;t t
466
+ s,@sbindir@,${exec_prefix}/sbin,;t t
467
+ s,@libexecdir@,${exec_prefix}/libexec,;t t
468
+ s,@datadir@,${prefix}/share,;t t
469
+ s,@sysconfdir@,${prefix}/etc,;t t
470
+ s,@sharedstatedir@,${prefix}/com,;t t
471
+ s,@localstatedir@,${prefix}/var,;t t
472
+ s,@libdir@,${exec_prefix}/lib,;t t
473
+ s,@includedir@,${prefix}/include,;t t
474
+ s,@oldincludedir@,/usr/include,;t t
475
+ s,@infodir@,${prefix}/info,;t t
476
+ s,@mandir@,${prefix}/man,;t t
477
+ s,@build_alias@,,;t t
478
+ s,@host_alias@,,;t t
479
+ s,@target_alias@,,;t t
480
+ s,@DEFS@,-DHAVE_CONFIG_H,;t t
481
+ s,@ECHO_C@,,;t t
482
+ s,@ECHO_N@,-n,;t t
483
+ s,@ECHO_T@,,;t t
484
+ s,@LIBS@, -loolm -ldstruct -lmisc -levent,;t t
485
+ s,@build@,x86_64-unknown-linux-gnu,;t t
486
+ s,@build_cpu@,x86_64,;t t
487
+ s,@build_vendor@,unknown,;t t
488
+ s,@build_os@,linux-gnu,;t t
489
+ s,@host@,x86_64-unknown-linux-gnu,;t t
490
+ s,@host_cpu@,x86_64,;t t
491
+ s,@host_vendor@,unknown,;t t
492
+ s,@host_os@,linux-gnu,;t t
493
+ s,@target@,x86_64-unknown-linux-gnu,;t t
494
+ s,@target_cpu@,x86_64,;t t
495
+ s,@target_vendor@,unknown,;t t
496
+ s,@target_os@,linux-gnu,;t t
497
+ s,@INSTALL_PROGRAM@,${INSTALL},;t t
498
+ s,@INSTALL_SCRIPT@,${INSTALL},;t t
499
+ s,@INSTALL_DATA@,${INSTALL} -m 644,;t t
500
+ s,@CYGPATH_W@,echo,;t t
501
+ s,@PACKAGE@,lmserver,;t t
502
+ s,@VERSION@,1.0,;t t
503
+ s,@ACLOCAL@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run aclocal-1.9,;t t
504
+ s,@AUTOCONF@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run autoconf,;t t
505
+ s,@AUTOMAKE@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run automake-1.9,;t t
506
+ s,@AUTOHEADER@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run autoheader,;t t
507
+ s,@MAKEINFO@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run makeinfo,;t t
508
+ s,@install_sh@,/chomes/redpony/moses/moses-arabic/lmserver/install-sh,;t t
509
+ s,@STRIP@,,;t t
510
+ s,@ac_ct_STRIP@,,;t t
511
+ s,@INSTALL_STRIP_PROGRAM@,${SHELL} $(install_sh) -c -s,;t t
512
+ s,@mkdir_p@,mkdir -p --,;t t
513
+ s,@AWK@,gawk,;t t
514
+ s,@SET_MAKE@,,;t t
515
+ s,@am__leading_dot@,.,;t t
516
+ s,@AMTAR@,${SHELL} /chomes/redpony/moses/moses-arabic/lmserver/missing --run tar,;t t
517
+ s,@am__tar@,${AMTAR} chof - "$$tardir",;t t
518
+ s,@am__untar@,${AMTAR} xf -,;t t
519
+ s,@CC@,gcc,;t t
520
+ s,@CFLAGS@,-g -O2,;t t
521
+ s,@LDFLAGS@,-L/fs/clip-software/libevent-1.4.8-stable/lib -L/fs/clip-software/srilm-1.5.6-PIC/lib/i686,;t t
522
+ s,@CPPFLAGS@,-I/fs/clip-software/libevent-1.4.8-stable/include -I/fs/clip-software/srilm-1.5.6-PIC/include,;t t
523
+ s,@ac_ct_CC@,gcc,;t t
524
+ s,@EXEEXT@,,;t t
525
+ s,@OBJEXT@,o,;t t
526
+ s,@DEPDIR@,.deps,;t t
527
+ s,@am__include@,include,;t t
528
+ s,@am__quote@,,;t t
529
+ s,@AMDEP_TRUE@,,;t t
530
+ s,@AMDEP_FALSE@,#,;t t
531
+ s,@AMDEPBACKSLASH@,\,;t t
532
+ s,@CCDEPMODE@,depmode=gcc3,;t t
533
+ s,@am__fastdepCC_TRUE@,,;t t
534
+ s,@am__fastdepCC_FALSE@,#,;t t
535
+ s,@CXX@,g++,;t t
536
+ s,@CXXFLAGS@,-g -O2,;t t
537
+ s,@ac_ct_CXX@,g++,;t t
538
+ s,@CXXDEPMODE@,depmode=gcc3,;t t
539
+ s,@am__fastdepCXX_TRUE@,,;t t
540
+ s,@am__fastdepCXX_FALSE@,#,;t t
541
+ s,@CPP@,gcc -E,;t t
542
+ s,@EGREP@,grep -E,;t t
543
+ s,@SRI_LM_TRUE@,,;t t
544
+ s,@SRI_LM_FALSE@,#,;t t
545
+ s,@DAEMON_OBJ@,,;t t
546
+ s,@LIBOBJS@,,;t t
547
+ s,@LTLIBOBJS@,,;t t
548
+ CEOF
549
+
550
+ # Split the substitutions into bite-sized pieces for seds with
551
+ # small command number limits, like on Digital OSF/1 and HP-UX.
552
+ ac_max_sed_lines=48
553
+ ac_sed_frag=1 # Number of current file.
554
+ ac_beg=1 # First line for current file.
555
+ ac_end=$ac_max_sed_lines # Line after last line for current file.
556
+ ac_more_lines=:
557
+ ac_sed_cmds=
558
+ while $ac_more_lines; do
559
+ if test $ac_beg -gt 1; then
560
+ sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
561
+ else
562
+ sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
563
+ fi
564
+ if test ! -s $tmp/subs.frag; then
565
+ ac_more_lines=false
566
+ else
567
+ # The purpose of the label and of the branching condition is to
568
+ # speed up the sed processing (if there are no `@' at all, there
569
+ # is no need to browse any of the substitutions).
570
+ # These are the two extra sed commands mentioned above.
571
+ (echo ':t
572
+ /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
573
+ if test -z "$ac_sed_cmds"; then
574
+ ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
575
+ else
576
+ ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
577
+ fi
578
+ ac_sed_frag=`expr $ac_sed_frag + 1`
579
+ ac_beg=$ac_end
580
+ ac_end=`expr $ac_end + $ac_max_sed_lines`
581
+ fi
582
+ done
583
+ if test -z "$ac_sed_cmds"; then
584
+ ac_sed_cmds=cat
585
+ fi
586
+ fi # test -n "$CONFIG_FILES"
587
+
588
+ for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
589
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
590
+ case $ac_file in
591
+ - | *:- | *:-:* ) # input from stdin
592
+ cat >$tmp/stdin
593
+ ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
594
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
595
+ *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
596
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
597
+ * ) ac_file_in=$ac_file.in ;;
598
+ esac
599
+
600
+ # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
601
+ ac_dir=`(dirname "$ac_file") 2>/dev/null ||
602
+ $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
603
+ X"$ac_file" : 'X\(//\)[^/]' \| \
604
+ X"$ac_file" : 'X\(//\)$' \| \
605
+ X"$ac_file" : 'X\(/\)' \| \
606
+ . : '\(.\)' 2>/dev/null ||
607
+ echo X"$ac_file" |
608
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
609
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
610
+ /^X\(\/\/\)$/{ s//\1/; q; }
611
+ /^X\(\/\).*/{ s//\1/; q; }
612
+ s/.*/./; q'`
613
+ { if $as_mkdir_p; then
614
+ mkdir -p "$ac_dir"
615
+ else
616
+ as_dir="$ac_dir"
617
+ as_dirs=
618
+ while test ! -d "$as_dir"; do
619
+ as_dirs="$as_dir $as_dirs"
620
+ as_dir=`(dirname "$as_dir") 2>/dev/null ||
621
+ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
622
+ X"$as_dir" : 'X\(//\)[^/]' \| \
623
+ X"$as_dir" : 'X\(//\)$' \| \
624
+ X"$as_dir" : 'X\(/\)' \| \
625
+ . : '\(.\)' 2>/dev/null ||
626
+ echo X"$as_dir" |
627
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
628
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
629
+ /^X\(\/\/\)$/{ s//\1/; q; }
630
+ /^X\(\/\).*/{ s//\1/; q; }
631
+ s/.*/./; q'`
632
+ done
633
+ test ! -n "$as_dirs" || mkdir $as_dirs
634
+ fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
635
+ echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
636
+ { (exit 1); exit 1; }; }; }
637
+
638
+ ac_builddir=.
639
+
640
+ if test "$ac_dir" != .; then
641
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
642
+ # A "../" for each directory in $ac_dir_suffix.
643
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
644
+ else
645
+ ac_dir_suffix= ac_top_builddir=
646
+ fi
647
+
648
+ case $srcdir in
649
+ .) # No --srcdir option. We are building in place.
650
+ ac_srcdir=.
651
+ if test -z "$ac_top_builddir"; then
652
+ ac_top_srcdir=.
653
+ else
654
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
655
+ fi ;;
656
+ [\\/]* | ?:[\\/]* ) # Absolute path.
657
+ ac_srcdir=$srcdir$ac_dir_suffix;
658
+ ac_top_srcdir=$srcdir ;;
659
+ *) # Relative path.
660
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
661
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
662
+ esac
663
+
664
+ # Do not use `cd foo && pwd` to compute absolute paths, because
665
+ # the directories may not exist.
666
+ case `pwd` in
667
+ .) ac_abs_builddir="$ac_dir";;
668
+ *)
669
+ case "$ac_dir" in
670
+ .) ac_abs_builddir=`pwd`;;
671
+ [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
672
+ *) ac_abs_builddir=`pwd`/"$ac_dir";;
673
+ esac;;
674
+ esac
675
+ case $ac_abs_builddir in
676
+ .) ac_abs_top_builddir=${ac_top_builddir}.;;
677
+ *)
678
+ case ${ac_top_builddir}. in
679
+ .) ac_abs_top_builddir=$ac_abs_builddir;;
680
+ [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
681
+ *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
682
+ esac;;
683
+ esac
684
+ case $ac_abs_builddir in
685
+ .) ac_abs_srcdir=$ac_srcdir;;
686
+ *)
687
+ case $ac_srcdir in
688
+ .) ac_abs_srcdir=$ac_abs_builddir;;
689
+ [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
690
+ *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
691
+ esac;;
692
+ esac
693
+ case $ac_abs_builddir in
694
+ .) ac_abs_top_srcdir=$ac_top_srcdir;;
695
+ *)
696
+ case $ac_top_srcdir in
697
+ .) ac_abs_top_srcdir=$ac_abs_builddir;;
698
+ [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
699
+ *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
700
+ esac;;
701
+ esac
702
+
703
+
704
+ case $INSTALL in
705
+ [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
706
+ *) ac_INSTALL=$ac_top_builddir$INSTALL ;;
707
+ esac
708
+
709
+ if test x"$ac_file" != x-; then
710
+ { echo "$as_me:$LINENO: creating $ac_file" >&5
711
+ echo "$as_me: creating $ac_file" >&6;}
712
+ rm -f "$ac_file"
713
+ fi
714
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
715
+ # use $as_me), people would be surprised to read:
716
+ # /* config.h. Generated by config.status. */
717
+ if test x"$ac_file" = x-; then
718
+ configure_input=
719
+ else
720
+ configure_input="$ac_file. "
721
+ fi
722
+ configure_input=$configure_input"Generated from `echo $ac_file_in |
723
+ sed 's,.*/,,'` by configure."
724
+
725
+ # First look for the input files in the build tree, otherwise in the
726
+ # src tree.
727
+ ac_file_inputs=`IFS=:
728
+ for f in $ac_file_in; do
729
+ case $f in
730
+ -) echo $tmp/stdin ;;
731
+ [\\/$]*)
732
+ # Absolute (can't be DOS-style, as IFS=:)
733
+ test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
734
+ echo "$as_me: error: cannot find input file: $f" >&2;}
735
+ { (exit 1); exit 1; }; }
736
+ echo "$f";;
737
+ *) # Relative
738
+ if test -f "$f"; then
739
+ # Build tree
740
+ echo "$f"
741
+ elif test -f "$srcdir/$f"; then
742
+ # Source tree
743
+ echo "$srcdir/$f"
744
+ else
745
+ # /dev/null tree
746
+ { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
747
+ echo "$as_me: error: cannot find input file: $f" >&2;}
748
+ { (exit 1); exit 1; }; }
749
+ fi;;
750
+ esac
751
+ done` || { (exit 1); exit 1; }
752
+ sed "/^[ ]*VPATH[ ]*=/{
753
+ s/:*\$(srcdir):*/:/;
754
+ s/:*\${srcdir}:*/:/;
755
+ s/:*@srcdir@:*/:/;
756
+ s/^\([^=]*=[ ]*\):*/\1/;
757
+ s/:*$//;
758
+ s/^[^=]*=[ ]*$//;
759
+ }
760
+
761
+ :t
762
+ /@[a-zA-Z_][a-zA-Z_0-9]*@/!b
763
+ s,@configure_input@,$configure_input,;t t
764
+ s,@srcdir@,$ac_srcdir,;t t
765
+ s,@abs_srcdir@,$ac_abs_srcdir,;t t
766
+ s,@top_srcdir@,$ac_top_srcdir,;t t
767
+ s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t
768
+ s,@builddir@,$ac_builddir,;t t
769
+ s,@abs_builddir@,$ac_abs_builddir,;t t
770
+ s,@top_builddir@,$ac_top_builddir,;t t
771
+ s,@abs_top_builddir@,$ac_abs_top_builddir,;t t
772
+ s,@INSTALL@,$ac_INSTALL,;t t
773
+ " $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
774
+ rm -f $tmp/stdin
775
+ if test x"$ac_file" != x-; then
776
+ mv $tmp/out $ac_file
777
+ else
778
+ cat $tmp/out
779
+ rm -f $tmp/out
780
+ fi
781
+
782
+ done
783
+
784
+ #
785
+ # CONFIG_HEADER section.
786
+ #
787
+
788
+ # These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
789
+ # NAME is the cpp macro being defined and VALUE is the value it is being given.
790
+ #
791
+ # ac_d sets the value in "#define NAME VALUE" lines.
792
+ ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)'
793
+ ac_dB='[ ].*$,\1#\2'
794
+ ac_dC=' '
795
+ ac_dD=',;t'
796
+ # ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
797
+ ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
798
+ ac_uB='$,\1#\2define\3'
799
+ ac_uC=' '
800
+ ac_uD=',;t'
801
+
802
+ for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue
803
+ # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
804
+ case $ac_file in
805
+ - | *:- | *:-:* ) # input from stdin
806
+ cat >$tmp/stdin
807
+ ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
808
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
809
+ *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
810
+ ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
811
+ * ) ac_file_in=$ac_file.in ;;
812
+ esac
813
+
814
+ test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5
815
+ echo "$as_me: creating $ac_file" >&6;}
816
+
817
+ # First look for the input files in the build tree, otherwise in the
818
+ # src tree.
819
+ ac_file_inputs=`IFS=:
820
+ for f in $ac_file_in; do
821
+ case $f in
822
+ -) echo $tmp/stdin ;;
823
+ [\\/$]*)
824
+ # Absolute (can't be DOS-style, as IFS=:)
825
+ test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
826
+ echo "$as_me: error: cannot find input file: $f" >&2;}
827
+ { (exit 1); exit 1; }; }
828
+ # Do quote $f, to prevent DOS paths from being IFS'd.
829
+ echo "$f";;
830
+ *) # Relative
831
+ if test -f "$f"; then
832
+ # Build tree
833
+ echo "$f"
834
+ elif test -f "$srcdir/$f"; then
835
+ # Source tree
836
+ echo "$srcdir/$f"
837
+ else
838
+ # /dev/null tree
839
+ { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
840
+ echo "$as_me: error: cannot find input file: $f" >&2;}
841
+ { (exit 1); exit 1; }; }
842
+ fi;;
843
+ esac
844
+ done` || { (exit 1); exit 1; }
845
+ # Remove the trailing spaces.
846
+ sed 's/[ ]*$//' $ac_file_inputs >$tmp/in
847
+
848
+ # Handle all the #define templates only if necessary.
849
+ if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then
850
+ # If there are no defines, we may have an empty if/fi
851
+ :
852
+ cat >$tmp/defines.sed <<CEOF
853
+ /^[ ]*#[ ]*define/!b
854
+ t clr
855
+ : clr
856
+ ${ac_dA}PACKAGE_NAME${ac_dB}PACKAGE_NAME${ac_dC}"lmserver"${ac_dD}
857
+ ${ac_dA}PACKAGE_TARNAME${ac_dB}PACKAGE_TARNAME${ac_dC}"lmserver"${ac_dD}
858
+ ${ac_dA}PACKAGE_VERSION${ac_dB}PACKAGE_VERSION${ac_dC}"1.0"${ac_dD}
859
+ ${ac_dA}PACKAGE_STRING${ac_dB}PACKAGE_STRING${ac_dC}"lmserver 1.0"${ac_dD}
860
+ ${ac_dA}PACKAGE_BUGREPORT${ac_dB}PACKAGE_BUGREPORT${ac_dC}""${ac_dD}
861
+ ${ac_dA}PACKAGE${ac_dB}PACKAGE${ac_dC}"lmserver"${ac_dD}
862
+ ${ac_dA}VERSION${ac_dB}VERSION${ac_dC}"1.0"${ac_dD}
863
+ ${ac_dA}STDC_HEADERS${ac_dB}STDC_HEADERS${ac_dC}1${ac_dD}
864
+ ${ac_dA}HAVE_SYS_TYPES_H${ac_dB}HAVE_SYS_TYPES_H${ac_dC}1${ac_dD}
865
+ ${ac_dA}HAVE_SYS_STAT_H${ac_dB}HAVE_SYS_STAT_H${ac_dC}1${ac_dD}
866
+ ${ac_dA}HAVE_STDLIB_H${ac_dB}HAVE_STDLIB_H${ac_dC}1${ac_dD}
867
+ ${ac_dA}HAVE_STRING_H${ac_dB}HAVE_STRING_H${ac_dC}1${ac_dD}
868
+ ${ac_dA}HAVE_MEMORY_H${ac_dB}HAVE_MEMORY_H${ac_dC}1${ac_dD}
869
+ ${ac_dA}HAVE_STRINGS_H${ac_dB}HAVE_STRINGS_H${ac_dC}1${ac_dD}
870
+ ${ac_dA}HAVE_INTTYPES_H${ac_dB}HAVE_INTTYPES_H${ac_dC}1${ac_dD}
871
+ ${ac_dA}HAVE_STDINT_H${ac_dB}HAVE_STDINT_H${ac_dC}1${ac_dD}
872
+ ${ac_dA}HAVE_UNISTD_H${ac_dB}HAVE_UNISTD_H${ac_dC}1${ac_dD}
873
+ ${ac_dA}HAVE_SRILM${ac_dB}HAVE_SRILM${ac_dC}${ac_dD}
874
+ ${ac_dA}HAVE_DAEMON${ac_dB}HAVE_DAEMON${ac_dC}${ac_dD}
875
+ ${ac_dA}HAVE__BOOL${ac_dB}HAVE__BOOL${ac_dC}1${ac_dD}
876
+ ${ac_dA}HAVE_STDBOOL_H${ac_dB}HAVE_STDBOOL_H${ac_dC}1${ac_dD}
877
+ ${ac_dA}HAVE_MALLOC_H${ac_dB}HAVE_MALLOC_H${ac_dC}${ac_dD}
878
+ ${ac_dA}HAVE_STRUCT_MALLINFO${ac_dB}HAVE_STRUCT_MALLINFO${ac_dC}${ac_dD}
879
+ ${ac_dA}ENDIAN_LITTLE${ac_dB}ENDIAN_LITTLE${ac_dC}1${ac_dD}
880
+ ${ac_dA}HAVE_MLOCKALL${ac_dB}HAVE_MLOCKALL${ac_dC}1${ac_dD}
881
+ CEOF
882
+ sed -f $tmp/defines.sed $tmp/in >$tmp/out
883
+ rm -f $tmp/in
884
+ mv $tmp/out $tmp/in
885
+
886
+ fi # grep
887
+
888
+ # Handle all the #undef templates
889
+ cat >$tmp/undefs.sed <<CEOF
890
+ /^[ ]*#[ ]*undef/!b
891
+ t clr
892
+ : clr
893
+ ${ac_uA}PACKAGE_NAME${ac_uB}PACKAGE_NAME${ac_uC}"lmserver"${ac_uD}
894
+ ${ac_uA}PACKAGE_TARNAME${ac_uB}PACKAGE_TARNAME${ac_uC}"lmserver"${ac_uD}
895
+ ${ac_uA}PACKAGE_VERSION${ac_uB}PACKAGE_VERSION${ac_uC}"1.0"${ac_uD}
896
+ ${ac_uA}PACKAGE_STRING${ac_uB}PACKAGE_STRING${ac_uC}"lmserver 1.0"${ac_uD}
897
+ ${ac_uA}PACKAGE_BUGREPORT${ac_uB}PACKAGE_BUGREPORT${ac_uC}""${ac_uD}
898
+ ${ac_uA}PACKAGE${ac_uB}PACKAGE${ac_uC}"lmserver"${ac_uD}
899
+ ${ac_uA}VERSION${ac_uB}VERSION${ac_uC}"1.0"${ac_uD}
900
+ ${ac_uA}STDC_HEADERS${ac_uB}STDC_HEADERS${ac_uC}1${ac_uD}
901
+ ${ac_uA}HAVE_SYS_TYPES_H${ac_uB}HAVE_SYS_TYPES_H${ac_uC}1${ac_uD}
902
+ ${ac_uA}HAVE_SYS_STAT_H${ac_uB}HAVE_SYS_STAT_H${ac_uC}1${ac_uD}
903
+ ${ac_uA}HAVE_STDLIB_H${ac_uB}HAVE_STDLIB_H${ac_uC}1${ac_uD}
904
+ ${ac_uA}HAVE_STRING_H${ac_uB}HAVE_STRING_H${ac_uC}1${ac_uD}
905
+ ${ac_uA}HAVE_MEMORY_H${ac_uB}HAVE_MEMORY_H${ac_uC}1${ac_uD}
906
+ ${ac_uA}HAVE_STRINGS_H${ac_uB}HAVE_STRINGS_H${ac_uC}1${ac_uD}
907
+ ${ac_uA}HAVE_INTTYPES_H${ac_uB}HAVE_INTTYPES_H${ac_uC}1${ac_uD}
908
+ ${ac_uA}HAVE_STDINT_H${ac_uB}HAVE_STDINT_H${ac_uC}1${ac_uD}
909
+ ${ac_uA}HAVE_UNISTD_H${ac_uB}HAVE_UNISTD_H${ac_uC}1${ac_uD}
910
+ ${ac_uA}HAVE_SRILM${ac_uB}HAVE_SRILM${ac_uC}${ac_uD}
911
+ ${ac_uA}HAVE_DAEMON${ac_uB}HAVE_DAEMON${ac_uC}${ac_uD}
912
+ ${ac_uA}HAVE__BOOL${ac_uB}HAVE__BOOL${ac_uC}1${ac_uD}
913
+ ${ac_uA}HAVE_STDBOOL_H${ac_uB}HAVE_STDBOOL_H${ac_uC}1${ac_uD}
914
+ ${ac_uA}HAVE_MALLOC_H${ac_uB}HAVE_MALLOC_H${ac_uC}${ac_uD}
915
+ ${ac_uA}HAVE_STRUCT_MALLINFO${ac_uB}HAVE_STRUCT_MALLINFO${ac_uC}${ac_uD}
916
+ ${ac_uA}ENDIAN_LITTLE${ac_uB}ENDIAN_LITTLE${ac_uC}1${ac_uD}
917
+ ${ac_uA}HAVE_MLOCKALL${ac_uB}HAVE_MLOCKALL${ac_uC}1${ac_uD}
918
+ s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */,
919
+ CEOF
920
+ sed -f $tmp/undefs.sed $tmp/in >$tmp/out
921
+ rm -f $tmp/in
922
+ mv $tmp/out $tmp/in
923
+
924
+ # Let's still pretend it is `configure' which instantiates (i.e., don't
925
+ # use $as_me), people would be surprised to read:
926
+ # /* config.h. Generated by config.status. */
927
+ if test x"$ac_file" = x-; then
928
+ echo "/* Generated by configure. */" >$tmp/config.h
929
+ else
930
+ echo "/* $ac_file. Generated by configure. */" >$tmp/config.h
931
+ fi
932
+ cat $tmp/in >>$tmp/config.h
933
+ rm -f $tmp/in
934
+ if test x"$ac_file" != x-; then
935
+ if diff $ac_file $tmp/config.h >/dev/null 2>&1; then
936
+ { echo "$as_me:$LINENO: $ac_file is unchanged" >&5
937
+ echo "$as_me: $ac_file is unchanged" >&6;}
938
+ else
939
+ ac_dir=`(dirname "$ac_file") 2>/dev/null ||
940
+ $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
941
+ X"$ac_file" : 'X\(//\)[^/]' \| \
942
+ X"$ac_file" : 'X\(//\)$' \| \
943
+ X"$ac_file" : 'X\(/\)' \| \
944
+ . : '\(.\)' 2>/dev/null ||
945
+ echo X"$ac_file" |
946
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
947
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
948
+ /^X\(\/\/\)$/{ s//\1/; q; }
949
+ /^X\(\/\).*/{ s//\1/; q; }
950
+ s/.*/./; q'`
951
+ { if $as_mkdir_p; then
952
+ mkdir -p "$ac_dir"
953
+ else
954
+ as_dir="$ac_dir"
955
+ as_dirs=
956
+ while test ! -d "$as_dir"; do
957
+ as_dirs="$as_dir $as_dirs"
958
+ as_dir=`(dirname "$as_dir") 2>/dev/null ||
959
+ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
960
+ X"$as_dir" : 'X\(//\)[^/]' \| \
961
+ X"$as_dir" : 'X\(//\)$' \| \
962
+ X"$as_dir" : 'X\(/\)' \| \
963
+ . : '\(.\)' 2>/dev/null ||
964
+ echo X"$as_dir" |
965
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
966
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
967
+ /^X\(\/\/\)$/{ s//\1/; q; }
968
+ /^X\(\/\).*/{ s//\1/; q; }
969
+ s/.*/./; q'`
970
+ done
971
+ test ! -n "$as_dirs" || mkdir $as_dirs
972
+ fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
973
+ echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
974
+ { (exit 1); exit 1; }; }; }
975
+
976
+ rm -f $ac_file
977
+ mv $tmp/config.h $ac_file
978
+ fi
979
+ else
980
+ cat $tmp/config.h
981
+ rm -f $tmp/config.h
982
+ fi
983
+ # Compute $ac_file's index in $config_headers.
984
+ _am_stamp_count=1
985
+ for _am_header in $config_headers :; do
986
+ case $_am_header in
987
+ $ac_file | $ac_file:* )
988
+ break ;;
989
+ * )
990
+ _am_stamp_count=`expr $_am_stamp_count + 1` ;;
991
+ esac
992
+ done
993
+ echo "timestamp for $ac_file" >`(dirname $ac_file) 2>/dev/null ||
994
+ $as_expr X$ac_file : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
995
+ X$ac_file : 'X\(//\)[^/]' \| \
996
+ X$ac_file : 'X\(//\)$' \| \
997
+ X$ac_file : 'X\(/\)' \| \
998
+ . : '\(.\)' 2>/dev/null ||
999
+ echo X$ac_file |
1000
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
1001
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
1002
+ /^X\(\/\/\)$/{ s//\1/; q; }
1003
+ /^X\(\/\).*/{ s//\1/; q; }
1004
+ s/.*/./; q'`/stamp-h$_am_stamp_count
1005
+ done
1006
+
1007
+ #
1008
+ # CONFIG_COMMANDS section.
1009
+ #
1010
+ for ac_file in : $CONFIG_COMMANDS; do test "x$ac_file" = x: && continue
1011
+ ac_dest=`echo "$ac_file" | sed 's,:.*,,'`
1012
+ ac_source=`echo "$ac_file" | sed 's,[^:]*:,,'`
1013
+ ac_dir=`(dirname "$ac_dest") 2>/dev/null ||
1014
+ $as_expr X"$ac_dest" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
1015
+ X"$ac_dest" : 'X\(//\)[^/]' \| \
1016
+ X"$ac_dest" : 'X\(//\)$' \| \
1017
+ X"$ac_dest" : 'X\(/\)' \| \
1018
+ . : '\(.\)' 2>/dev/null ||
1019
+ echo X"$ac_dest" |
1020
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
1021
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
1022
+ /^X\(\/\/\)$/{ s//\1/; q; }
1023
+ /^X\(\/\).*/{ s//\1/; q; }
1024
+ s/.*/./; q'`
1025
+ { if $as_mkdir_p; then
1026
+ mkdir -p "$ac_dir"
1027
+ else
1028
+ as_dir="$ac_dir"
1029
+ as_dirs=
1030
+ while test ! -d "$as_dir"; do
1031
+ as_dirs="$as_dir $as_dirs"
1032
+ as_dir=`(dirname "$as_dir") 2>/dev/null ||
1033
+ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
1034
+ X"$as_dir" : 'X\(//\)[^/]' \| \
1035
+ X"$as_dir" : 'X\(//\)$' \| \
1036
+ X"$as_dir" : 'X\(/\)' \| \
1037
+ . : '\(.\)' 2>/dev/null ||
1038
+ echo X"$as_dir" |
1039
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
1040
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
1041
+ /^X\(\/\/\)$/{ s//\1/; q; }
1042
+ /^X\(\/\).*/{ s//\1/; q; }
1043
+ s/.*/./; q'`
1044
+ done
1045
+ test ! -n "$as_dirs" || mkdir $as_dirs
1046
+ fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
1047
+ echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
1048
+ { (exit 1); exit 1; }; }; }
1049
+
1050
+ ac_builddir=.
1051
+
1052
+ if test "$ac_dir" != .; then
1053
+ ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
1054
+ # A "../" for each directory in $ac_dir_suffix.
1055
+ ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
1056
+ else
1057
+ ac_dir_suffix= ac_top_builddir=
1058
+ fi
1059
+
1060
+ case $srcdir in
1061
+ .) # No --srcdir option. We are building in place.
1062
+ ac_srcdir=.
1063
+ if test -z "$ac_top_builddir"; then
1064
+ ac_top_srcdir=.
1065
+ else
1066
+ ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
1067
+ fi ;;
1068
+ [\\/]* | ?:[\\/]* ) # Absolute path.
1069
+ ac_srcdir=$srcdir$ac_dir_suffix;
1070
+ ac_top_srcdir=$srcdir ;;
1071
+ *) # Relative path.
1072
+ ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
1073
+ ac_top_srcdir=$ac_top_builddir$srcdir ;;
1074
+ esac
1075
+
1076
+ # Do not use `cd foo && pwd` to compute absolute paths, because
1077
+ # the directories may not exist.
1078
+ case `pwd` in
1079
+ .) ac_abs_builddir="$ac_dir";;
1080
+ *)
1081
+ case "$ac_dir" in
1082
+ .) ac_abs_builddir=`pwd`;;
1083
+ [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";;
1084
+ *) ac_abs_builddir=`pwd`/"$ac_dir";;
1085
+ esac;;
1086
+ esac
1087
+ case $ac_abs_builddir in
1088
+ .) ac_abs_top_builddir=${ac_top_builddir}.;;
1089
+ *)
1090
+ case ${ac_top_builddir}. in
1091
+ .) ac_abs_top_builddir=$ac_abs_builddir;;
1092
+ [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;;
1093
+ *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;;
1094
+ esac;;
1095
+ esac
1096
+ case $ac_abs_builddir in
1097
+ .) ac_abs_srcdir=$ac_srcdir;;
1098
+ *)
1099
+ case $ac_srcdir in
1100
+ .) ac_abs_srcdir=$ac_abs_builddir;;
1101
+ [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;;
1102
+ *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;;
1103
+ esac;;
1104
+ esac
1105
+ case $ac_abs_builddir in
1106
+ .) ac_abs_top_srcdir=$ac_top_srcdir;;
1107
+ *)
1108
+ case $ac_top_srcdir in
1109
+ .) ac_abs_top_srcdir=$ac_abs_builddir;;
1110
+ [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;;
1111
+ *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;;
1112
+ esac;;
1113
+ esac
1114
+
1115
+
1116
+ { echo "$as_me:$LINENO: executing $ac_dest commands" >&5
1117
+ echo "$as_me: executing $ac_dest commands" >&6;}
1118
+ case $ac_dest in
1119
+ depfiles ) test x"$AMDEP_TRUE" != x"" || for mf in $CONFIG_FILES; do
1120
+ # Strip MF so we end up with the name of the file.
1121
+ mf=`echo "$mf" | sed -e 's/:.*$//'`
1122
+ # Check whether this is an Automake generated Makefile or not.
1123
+ # We used to match only the files named `Makefile.in', but
1124
+ # some people rename them; so instead we look at the file content.
1125
+ # Grep'ing the first line is not enough: some people post-process
1126
+ # each Makefile.in and add a new line on top of each file to say so.
1127
+ # So let's grep whole file.
1128
+ if grep '^#.*generated by automake' $mf > /dev/null 2>&1; then
1129
+ dirpart=`(dirname "$mf") 2>/dev/null ||
1130
+ $as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
1131
+ X"$mf" : 'X\(//\)[^/]' \| \
1132
+ X"$mf" : 'X\(//\)$' \| \
1133
+ X"$mf" : 'X\(/\)' \| \
1134
+ . : '\(.\)' 2>/dev/null ||
1135
+ echo X"$mf" |
1136
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
1137
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
1138
+ /^X\(\/\/\)$/{ s//\1/; q; }
1139
+ /^X\(\/\).*/{ s//\1/; q; }
1140
+ s/.*/./; q'`
1141
+ else
1142
+ continue
1143
+ fi
1144
+ # Extract the definition of DEPDIR, am__include, and am__quote
1145
+ # from the Makefile without running `make'.
1146
+ DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
1147
+ test -z "$DEPDIR" && continue
1148
+ am__include=`sed -n 's/^am__include = //p' < "$mf"`
1149
+ test -z "am__include" && continue
1150
+ am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
1151
+ # When using ansi2knr, U may be empty or an underscore; expand it
1152
+ U=`sed -n 's/^U = //p' < "$mf"`
1153
+ # Find all dependency output files, they are included files with
1154
+ # $(DEPDIR) in their names. We invoke sed twice because it is the
1155
+ # simplest approach to changing $(DEPDIR) to its actual value in the
1156
+ # expansion.
1157
+ for file in `sed -n "
1158
+ s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
1159
+ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do
1160
+ # Make sure the directory exists.
1161
+ test -f "$dirpart/$file" && continue
1162
+ fdir=`(dirname "$file") 2>/dev/null ||
1163
+ $as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
1164
+ X"$file" : 'X\(//\)[^/]' \| \
1165
+ X"$file" : 'X\(//\)$' \| \
1166
+ X"$file" : 'X\(/\)' \| \
1167
+ . : '\(.\)' 2>/dev/null ||
1168
+ echo X"$file" |
1169
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
1170
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
1171
+ /^X\(\/\/\)$/{ s//\1/; q; }
1172
+ /^X\(\/\).*/{ s//\1/; q; }
1173
+ s/.*/./; q'`
1174
+ { if $as_mkdir_p; then
1175
+ mkdir -p $dirpart/$fdir
1176
+ else
1177
+ as_dir=$dirpart/$fdir
1178
+ as_dirs=
1179
+ while test ! -d "$as_dir"; do
1180
+ as_dirs="$as_dir $as_dirs"
1181
+ as_dir=`(dirname "$as_dir") 2>/dev/null ||
1182
+ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
1183
+ X"$as_dir" : 'X\(//\)[^/]' \| \
1184
+ X"$as_dir" : 'X\(//\)$' \| \
1185
+ X"$as_dir" : 'X\(/\)' \| \
1186
+ . : '\(.\)' 2>/dev/null ||
1187
+ echo X"$as_dir" |
1188
+ sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
1189
+ /^X\(\/\/\)[^/].*/{ s//\1/; q; }
1190
+ /^X\(\/\/\)$/{ s//\1/; q; }
1191
+ /^X\(\/\).*/{ s//\1/; q; }
1192
+ s/.*/./; q'`
1193
+ done
1194
+ test ! -n "$as_dirs" || mkdir $as_dirs
1195
+ fi || { { echo "$as_me:$LINENO: error: cannot create directory $dirpart/$fdir" >&5
1196
+ echo "$as_me: error: cannot create directory $dirpart/$fdir" >&2;}
1197
+ { (exit 1); exit 1; }; }; }
1198
+
1199
+ # echo "creating $dirpart/$file"
1200
+ echo '# dummy' > "$dirpart/$file"
1201
+ done
1202
+ done
1203
+ ;;
1204
+ esac
1205
+ done
1206
+
1207
+ { (exit 0); exit 0; }
mosesdecoder/contrib/lmserver/config.sub ADDED
@@ -0,0 +1,1676 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /bin/sh
2
+ # Configuration validation subroutine script.
3
+ # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4
+ # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
5
+ # Free Software Foundation, Inc.
6
+
7
+ timestamp='2008-01-16'
8
+
9
+ # This file is (in principle) common to ALL GNU software.
10
+ # The presence of a machine in this file suggests that SOME GNU software
11
+ # can handle that machine. It does not imply ALL GNU software can.
12
+ #
13
+ # This file is free software; you can redistribute it and/or modify
14
+ # it under the terms of the GNU General Public License as published by
15
+ # the Free Software Foundation; either version 2 of the License, or
16
+ # (at your option) any later version.
17
+ #
18
+ # This program is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21
+ # GNU General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU General Public License
24
+ # along with this program; if not, write to the Free Software
25
+ # Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
26
+ # 02110-1301, USA.
27
+ #
28
+ # As a special exception to the GNU General Public License, if you
29
+ # distribute this file as part of a program that contains a
30
+ # configuration script generated by Autoconf, you may include it under
31
+ # the same distribution terms that you use for the rest of that program.
32
+
33
+
34
+ # Please send patches to <config-patches@gnu.org>. Submit a context
35
+ # diff and a properly formatted ChangeLog entry.
36
+ #
37
+ # Configuration subroutine to validate and canonicalize a configuration type.
38
+ # Supply the specified configuration type as an argument.
39
+ # If it is invalid, we print an error message on stderr and exit with code 1.
40
+ # Otherwise, we print the canonical config type on stdout and succeed.
41
+
42
+ # This file is supposed to be the same for all GNU packages
43
+ # and recognize all the CPU types, system types and aliases
44
+ # that are meaningful with *any* GNU software.
45
+ # Each package is responsible for reporting which valid configurations
46
+ # it does not support. The user should be able to distinguish
47
+ # a failure to support a valid configuration from a meaningless
48
+ # configuration.
49
+
50
+ # The goal of this file is to map all the various variations of a given
51
+ # machine specification into a single specification in the form:
52
+ # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
53
+ # or in some cases, the newer four-part form:
54
+ # CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
55
+ # It is wrong to echo any other type of specification.
56
+
57
+ me=`echo "$0" | sed -e 's,.*/,,'`
58
+
59
+ usage="\
60
+ Usage: $0 [OPTION] CPU-MFR-OPSYS
61
+ $0 [OPTION] ALIAS
62
+
63
+ Canonicalize a configuration name.
64
+
65
+ Operation modes:
66
+ -h, --help print this help, then exit
67
+ -t, --time-stamp print date of last modification, then exit
68
+ -v, --version print version number, then exit
69
+
70
+ Report bugs and patches to <config-patches@gnu.org>."
71
+
72
+ version="\
73
+ GNU config.sub ($timestamp)
74
+
75
+ Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
76
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
77
+
78
+ This is free software; see the source for copying conditions. There is NO
79
+ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
80
+
81
+ help="
82
+ Try \`$me --help' for more information."
83
+
84
+ # Parse command line
85
+ while test $# -gt 0 ; do
86
+ case $1 in
87
+ --time-stamp | --time* | -t )
88
+ echo "$timestamp" ; exit ;;
89
+ --version | -v )
90
+ echo "$version" ; exit ;;
91
+ --help | --h* | -h )
92
+ echo "$usage"; exit ;;
93
+ -- ) # Stop option processing
94
+ shift; break ;;
95
+ - ) # Use stdin as input.
96
+ break ;;
97
+ -* )
98
+ echo "$me: invalid option $1$help"
99
+ exit 1 ;;
100
+
101
+ *local*)
102
+ # First pass through any local machine types.
103
+ echo $1
104
+ exit ;;
105
+
106
+ * )
107
+ break ;;
108
+ esac
109
+ done
110
+
111
+ case $# in
112
+ 0) echo "$me: missing argument$help" >&2
113
+ exit 1;;
114
+ 1) ;;
115
+ *) echo "$me: too many arguments$help" >&2
116
+ exit 1;;
117
+ esac
118
+
119
+ # Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
120
+ # Here we must recognize all the valid KERNEL-OS combinations.
121
+ maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
122
+ case $maybe_os in
123
+ nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
124
+ uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
125
+ storm-chaos* | os2-emx* | rtmk-nova*)
126
+ os=-$maybe_os
127
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
128
+ ;;
129
+ *)
130
+ basic_machine=`echo $1 | sed 's/-[^-]*$//'`
131
+ if [ $basic_machine != $1 ]
132
+ then os=`echo $1 | sed 's/.*-/-/'`
133
+ else os=; fi
134
+ ;;
135
+ esac
136
+
137
+ ### Let's recognize common machines as not being operating systems so
138
+ ### that things like config.sub decstation-3100 work. We also
139
+ ### recognize some manufacturers as not being operating systems, so we
140
+ ### can provide default operating systems below.
141
+ case $os in
142
+ -sun*os*)
143
+ # Prevent following clause from handling this invalid input.
144
+ ;;
145
+ -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
146
+ -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
147
+ -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
148
+ -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
149
+ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
150
+ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
151
+ -apple | -axis | -knuth | -cray)
152
+ os=
153
+ basic_machine=$1
154
+ ;;
155
+ -sim | -cisco | -oki | -wec | -winbond)
156
+ os=
157
+ basic_machine=$1
158
+ ;;
159
+ -scout)
160
+ ;;
161
+ -wrs)
162
+ os=-vxworks
163
+ basic_machine=$1
164
+ ;;
165
+ -chorusos*)
166
+ os=-chorusos
167
+ basic_machine=$1
168
+ ;;
169
+ -chorusrdb)
170
+ os=-chorusrdb
171
+ basic_machine=$1
172
+ ;;
173
+ -hiux*)
174
+ os=-hiuxwe2
175
+ ;;
176
+ -sco6)
177
+ os=-sco5v6
178
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
179
+ ;;
180
+ -sco5)
181
+ os=-sco3.2v5
182
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
183
+ ;;
184
+ -sco4)
185
+ os=-sco3.2v4
186
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
187
+ ;;
188
+ -sco3.2.[4-9]*)
189
+ os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
190
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
191
+ ;;
192
+ -sco3.2v[4-9]*)
193
+ # Don't forget version if it is 3.2v4 or newer.
194
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
195
+ ;;
196
+ -sco5v6*)
197
+ # Don't forget version if it is 3.2v4 or newer.
198
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
199
+ ;;
200
+ -sco*)
201
+ os=-sco3.2v2
202
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
203
+ ;;
204
+ -udk*)
205
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
206
+ ;;
207
+ -isc)
208
+ os=-isc2.2
209
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
210
+ ;;
211
+ -clix*)
212
+ basic_machine=clipper-intergraph
213
+ ;;
214
+ -isc*)
215
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
216
+ ;;
217
+ -lynx*)
218
+ os=-lynxos
219
+ ;;
220
+ -ptx*)
221
+ basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
222
+ ;;
223
+ -windowsnt*)
224
+ os=`echo $os | sed -e 's/windowsnt/winnt/'`
225
+ ;;
226
+ -psos*)
227
+ os=-psos
228
+ ;;
229
+ -mint | -mint[0-9]*)
230
+ basic_machine=m68k-atari
231
+ os=-mint
232
+ ;;
233
+ esac
234
+
235
+ # Decode aliases for certain CPU-COMPANY combinations.
236
+ case $basic_machine in
237
+ # Recognize the basic CPU types without company name.
238
+ # Some are omitted here because they have special meanings below.
239
+ 1750a | 580 \
240
+ | a29k \
241
+ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
242
+ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
243
+ | am33_2.0 \
244
+ | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
245
+ | bfin \
246
+ | c4x | clipper \
247
+ | d10v | d30v | dlx | dsp16xx | dvp \
248
+ | fido | fr30 | frv \
249
+ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
250
+ | i370 | i860 | i960 | ia64 \
251
+ | ip2k | iq2000 \
252
+ | m32c | m32r | m32rle | m68000 | m68k | m88k \
253
+ | maxq | mb | microblaze | mcore | mep \
254
+ | mips | mipsbe | mipseb | mipsel | mipsle \
255
+ | mips16 \
256
+ | mips64 | mips64el \
257
+ | mips64vr | mips64vrel \
258
+ | mips64orion | mips64orionel \
259
+ | mips64vr4100 | mips64vr4100el \
260
+ | mips64vr4300 | mips64vr4300el \
261
+ | mips64vr5000 | mips64vr5000el \
262
+ | mips64vr5900 | mips64vr5900el \
263
+ | mipsisa32 | mipsisa32el \
264
+ | mipsisa32r2 | mipsisa32r2el \
265
+ | mipsisa64 | mipsisa64el \
266
+ | mipsisa64r2 | mipsisa64r2el \
267
+ | mipsisa64sb1 | mipsisa64sb1el \
268
+ | mipsisa64sr71k | mipsisa64sr71kel \
269
+ | mipstx39 | mipstx39el \
270
+ | mn10200 | mn10300 \
271
+ | mt \
272
+ | msp430 \
273
+ | nios | nios2 \
274
+ | ns16k | ns32k \
275
+ | or32 \
276
+ | pdp10 | pdp11 | pj | pjl \
277
+ | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
278
+ | pyramid \
279
+ | score \
280
+ | sh | sh[1234] | sh[24]a | sh[24]a*eb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
281
+ | sh64 | sh64le \
282
+ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
283
+ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \
284
+ | spu | strongarm \
285
+ | tahoe | thumb | tic4x | tic80 | tron \
286
+ | v850 | v850e \
287
+ | we32k \
288
+ | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
289
+ | z8k)
290
+ basic_machine=$basic_machine-unknown
291
+ ;;
292
+ m6811 | m68hc11 | m6812 | m68hc12)
293
+ # Motorola 68HC11/12.
294
+ basic_machine=$basic_machine-unknown
295
+ os=-none
296
+ ;;
297
+ m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
298
+ ;;
299
+ ms1)
300
+ basic_machine=mt-unknown
301
+ ;;
302
+
303
+ # We use `pc' rather than `unknown'
304
+ # because (1) that's what they normally are, and
305
+ # (2) the word "unknown" tends to confuse beginning users.
306
+ i*86 | x86_64)
307
+ basic_machine=$basic_machine-pc
308
+ ;;
309
+ # Object if more than one company name word.
310
+ *-*-*)
311
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
312
+ exit 1
313
+ ;;
314
+ # Recognize the basic CPU types with company name.
315
+ 580-* \
316
+ | a29k-* \
317
+ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
318
+ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
319
+ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
320
+ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \
321
+ | avr-* | avr32-* \
322
+ | bfin-* | bs2000-* \
323
+ | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
324
+ | clipper-* | craynv-* | cydra-* \
325
+ | d10v-* | d30v-* | dlx-* \
326
+ | elxsi-* \
327
+ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
328
+ | h8300-* | h8500-* \
329
+ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
330
+ | i*86-* | i860-* | i960-* | ia64-* \
331
+ | ip2k-* | iq2000-* \
332
+ | m32c-* | m32r-* | m32rle-* \
333
+ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
334
+ | m88110-* | m88k-* | maxq-* | mcore-* \
335
+ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
336
+ | mips16-* \
337
+ | mips64-* | mips64el-* \
338
+ | mips64vr-* | mips64vrel-* \
339
+ | mips64orion-* | mips64orionel-* \
340
+ | mips64vr4100-* | mips64vr4100el-* \
341
+ | mips64vr4300-* | mips64vr4300el-* \
342
+ | mips64vr5000-* | mips64vr5000el-* \
343
+ | mips64vr5900-* | mips64vr5900el-* \
344
+ | mipsisa32-* | mipsisa32el-* \
345
+ | mipsisa32r2-* | mipsisa32r2el-* \
346
+ | mipsisa64-* | mipsisa64el-* \
347
+ | mipsisa64r2-* | mipsisa64r2el-* \
348
+ | mipsisa64sb1-* | mipsisa64sb1el-* \
349
+ | mipsisa64sr71k-* | mipsisa64sr71kel-* \
350
+ | mipstx39-* | mipstx39el-* \
351
+ | mmix-* \
352
+ | mt-* \
353
+ | msp430-* \
354
+ | nios-* | nios2-* \
355
+ | none-* | np1-* | ns16k-* | ns32k-* \
356
+ | orion-* \
357
+ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
358
+ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
359
+ | pyramid-* \
360
+ | romp-* | rs6000-* \
361
+ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]a*eb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
362
+ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
363
+ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
364
+ | sparclite-* \
365
+ | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
366
+ | tahoe-* | thumb-* \
367
+ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
368
+ | tron-* \
369
+ | v850-* | v850e-* | vax-* \
370
+ | we32k-* \
371
+ | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
372
+ | xstormy16-* | xtensa*-* \
373
+ | ymp-* \
374
+ | z8k-*)
375
+ ;;
376
+ # Recognize the basic CPU types without company name, with glob match.
377
+ xtensa*)
378
+ basic_machine=$basic_machine-unknown
379
+ ;;
380
+ # Recognize the various machine names and aliases which stand
381
+ # for a CPU type and a company and sometimes even an OS.
382
+ 386bsd)
383
+ basic_machine=i386-unknown
384
+ os=-bsd
385
+ ;;
386
+ 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
387
+ basic_machine=m68000-att
388
+ ;;
389
+ 3b*)
390
+ basic_machine=we32k-att
391
+ ;;
392
+ a29khif)
393
+ basic_machine=a29k-amd
394
+ os=-udi
395
+ ;;
396
+ abacus)
397
+ basic_machine=abacus-unknown
398
+ ;;
399
+ adobe68k)
400
+ basic_machine=m68010-adobe
401
+ os=-scout
402
+ ;;
403
+ alliant | fx80)
404
+ basic_machine=fx80-alliant
405
+ ;;
406
+ altos | altos3068)
407
+ basic_machine=m68k-altos
408
+ ;;
409
+ am29k)
410
+ basic_machine=a29k-none
411
+ os=-bsd
412
+ ;;
413
+ amd64)
414
+ basic_machine=x86_64-pc
415
+ ;;
416
+ amd64-*)
417
+ basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
418
+ ;;
419
+ amdahl)
420
+ basic_machine=580-amdahl
421
+ os=-sysv
422
+ ;;
423
+ amiga | amiga-*)
424
+ basic_machine=m68k-unknown
425
+ ;;
426
+ amigaos | amigados)
427
+ basic_machine=m68k-unknown
428
+ os=-amigaos
429
+ ;;
430
+ amigaunix | amix)
431
+ basic_machine=m68k-unknown
432
+ os=-sysv4
433
+ ;;
434
+ apollo68)
435
+ basic_machine=m68k-apollo
436
+ os=-sysv
437
+ ;;
438
+ apollo68bsd)
439
+ basic_machine=m68k-apollo
440
+ os=-bsd
441
+ ;;
442
+ aux)
443
+ basic_machine=m68k-apple
444
+ os=-aux
445
+ ;;
446
+ balance)
447
+ basic_machine=ns32k-sequent
448
+ os=-dynix
449
+ ;;
450
+ blackfin)
451
+ basic_machine=bfin-unknown
452
+ os=-linux
453
+ ;;
454
+ blackfin-*)
455
+ basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
456
+ os=-linux
457
+ ;;
458
+ c90)
459
+ basic_machine=c90-cray
460
+ os=-unicos
461
+ ;;
462
+ convex-c1)
463
+ basic_machine=c1-convex
464
+ os=-bsd
465
+ ;;
466
+ convex-c2)
467
+ basic_machine=c2-convex
468
+ os=-bsd
469
+ ;;
470
+ convex-c32)
471
+ basic_machine=c32-convex
472
+ os=-bsd
473
+ ;;
474
+ convex-c34)
475
+ basic_machine=c34-convex
476
+ os=-bsd
477
+ ;;
478
+ convex-c38)
479
+ basic_machine=c38-convex
480
+ os=-bsd
481
+ ;;
482
+ cray | j90)
483
+ basic_machine=j90-cray
484
+ os=-unicos
485
+ ;;
486
+ craynv)
487
+ basic_machine=craynv-cray
488
+ os=-unicosmp
489
+ ;;
490
+ cr16)
491
+ basic_machine=cr16-unknown
492
+ os=-elf
493
+ ;;
494
+ crds | unos)
495
+ basic_machine=m68k-crds
496
+ ;;
497
+ crisv32 | crisv32-* | etraxfs*)
498
+ basic_machine=crisv32-axis
499
+ ;;
500
+ cris | cris-* | etrax*)
501
+ basic_machine=cris-axis
502
+ ;;
503
+ crx)
504
+ basic_machine=crx-unknown
505
+ os=-elf
506
+ ;;
507
+ da30 | da30-*)
508
+ basic_machine=m68k-da30
509
+ ;;
510
+ decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
511
+ basic_machine=mips-dec
512
+ ;;
513
+ decsystem10* | dec10*)
514
+ basic_machine=pdp10-dec
515
+ os=-tops10
516
+ ;;
517
+ decsystem20* | dec20*)
518
+ basic_machine=pdp10-dec
519
+ os=-tops20
520
+ ;;
521
+ delta | 3300 | motorola-3300 | motorola-delta \
522
+ | 3300-motorola | delta-motorola)
523
+ basic_machine=m68k-motorola
524
+ ;;
525
+ delta88)
526
+ basic_machine=m88k-motorola
527
+ os=-sysv3
528
+ ;;
529
+ djgpp)
530
+ basic_machine=i586-pc
531
+ os=-msdosdjgpp
532
+ ;;
533
+ dpx20 | dpx20-*)
534
+ basic_machine=rs6000-bull
535
+ os=-bosx
536
+ ;;
537
+ dpx2* | dpx2*-bull)
538
+ basic_machine=m68k-bull
539
+ os=-sysv3
540
+ ;;
541
+ ebmon29k)
542
+ basic_machine=a29k-amd
543
+ os=-ebmon
544
+ ;;
545
+ elxsi)
546
+ basic_machine=elxsi-elxsi
547
+ os=-bsd
548
+ ;;
549
+ encore | umax | mmax)
550
+ basic_machine=ns32k-encore
551
+ ;;
552
+ es1800 | OSE68k | ose68k | ose | OSE)
553
+ basic_machine=m68k-ericsson
554
+ os=-ose
555
+ ;;
556
+ fx2800)
557
+ basic_machine=i860-alliant
558
+ ;;
559
+ genix)
560
+ basic_machine=ns32k-ns
561
+ ;;
562
+ gmicro)
563
+ basic_machine=tron-gmicro
564
+ os=-sysv
565
+ ;;
566
+ go32)
567
+ basic_machine=i386-pc
568
+ os=-go32
569
+ ;;
570
+ h3050r* | hiux*)
571
+ basic_machine=hppa1.1-hitachi
572
+ os=-hiuxwe2
573
+ ;;
574
+ h8300hms)
575
+ basic_machine=h8300-hitachi
576
+ os=-hms
577
+ ;;
578
+ h8300xray)
579
+ basic_machine=h8300-hitachi
580
+ os=-xray
581
+ ;;
582
+ h8500hms)
583
+ basic_machine=h8500-hitachi
584
+ os=-hms
585
+ ;;
586
+ harris)
587
+ basic_machine=m88k-harris
588
+ os=-sysv3
589
+ ;;
590
+ hp300-*)
591
+ basic_machine=m68k-hp
592
+ ;;
593
+ hp300bsd)
594
+ basic_machine=m68k-hp
595
+ os=-bsd
596
+ ;;
597
+ hp300hpux)
598
+ basic_machine=m68k-hp
599
+ os=-hpux
600
+ ;;
601
+ hp3k9[0-9][0-9] | hp9[0-9][0-9])
602
+ basic_machine=hppa1.0-hp
603
+ ;;
604
+ hp9k2[0-9][0-9] | hp9k31[0-9])
605
+ basic_machine=m68000-hp
606
+ ;;
607
+ hp9k3[2-9][0-9])
608
+ basic_machine=m68k-hp
609
+ ;;
610
+ hp9k6[0-9][0-9] | hp6[0-9][0-9])
611
+ basic_machine=hppa1.0-hp
612
+ ;;
613
+ hp9k7[0-79][0-9] | hp7[0-79][0-9])
614
+ basic_machine=hppa1.1-hp
615
+ ;;
616
+ hp9k78[0-9] | hp78[0-9])
617
+ # FIXME: really hppa2.0-hp
618
+ basic_machine=hppa1.1-hp
619
+ ;;
620
+ hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
621
+ # FIXME: really hppa2.0-hp
622
+ basic_machine=hppa1.1-hp
623
+ ;;
624
+ hp9k8[0-9][13679] | hp8[0-9][13679])
625
+ basic_machine=hppa1.1-hp
626
+ ;;
627
+ hp9k8[0-9][0-9] | hp8[0-9][0-9])
628
+ basic_machine=hppa1.0-hp
629
+ ;;
630
+ hppa-next)
631
+ os=-nextstep3
632
+ ;;
633
+ hppaosf)
634
+ basic_machine=hppa1.1-hp
635
+ os=-osf
636
+ ;;
637
+ hppro)
638
+ basic_machine=hppa1.1-hp
639
+ os=-proelf
640
+ ;;
641
+ i370-ibm* | ibm*)
642
+ basic_machine=i370-ibm
643
+ ;;
644
+ # I'm not sure what "Sysv32" means. Should this be sysv3.2?
645
+ i*86v32)
646
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
647
+ os=-sysv32
648
+ ;;
649
+ i*86v4*)
650
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
651
+ os=-sysv4
652
+ ;;
653
+ i*86v)
654
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
655
+ os=-sysv
656
+ ;;
657
+ i*86sol2)
658
+ basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
659
+ os=-solaris2
660
+ ;;
661
+ i386mach)
662
+ basic_machine=i386-mach
663
+ os=-mach
664
+ ;;
665
+ i386-vsta | vsta)
666
+ basic_machine=i386-unknown
667
+ os=-vsta
668
+ ;;
669
+ iris | iris4d)
670
+ basic_machine=mips-sgi
671
+ case $os in
672
+ -irix*)
673
+ ;;
674
+ *)
675
+ os=-irix4
676
+ ;;
677
+ esac
678
+ ;;
679
+ isi68 | isi)
680
+ basic_machine=m68k-isi
681
+ os=-sysv
682
+ ;;
683
+ m68knommu)
684
+ basic_machine=m68k-unknown
685
+ os=-linux
686
+ ;;
687
+ m68knommu-*)
688
+ basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
689
+ os=-linux
690
+ ;;
691
+ m88k-omron*)
692
+ basic_machine=m88k-omron
693
+ ;;
694
+ magnum | m3230)
695
+ basic_machine=mips-mips
696
+ os=-sysv
697
+ ;;
698
+ merlin)
699
+ basic_machine=ns32k-utek
700
+ os=-sysv
701
+ ;;
702
+ mingw32)
703
+ basic_machine=i386-pc
704
+ os=-mingw32
705
+ ;;
706
+ mingw32ce)
707
+ basic_machine=arm-unknown
708
+ os=-mingw32ce
709
+ ;;
710
+ miniframe)
711
+ basic_machine=m68000-convergent
712
+ ;;
713
+ *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
714
+ basic_machine=m68k-atari
715
+ os=-mint
716
+ ;;
717
+ mipsEE* | ee | ps2)
718
+ basic_machine=mips64r5900el-scei
719
+ case $os in
720
+ -linux*)
721
+ ;;
722
+ *)
723
+ os=-elf
724
+ ;;
725
+ esac
726
+ ;;
727
+ iop)
728
+ basic_machine=mipsel-scei
729
+ os=-irx
730
+ ;;
731
+ dvp)
732
+ basic_machine=dvp-scei
733
+ os=-elf
734
+ ;;
735
+ mips3*-*)
736
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
737
+ ;;
738
+ mips3*)
739
+ basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
740
+ ;;
741
+ monitor)
742
+ basic_machine=m68k-rom68k
743
+ os=-coff
744
+ ;;
745
+ morphos)
746
+ basic_machine=powerpc-unknown
747
+ os=-morphos
748
+ ;;
749
+ msdos)
750
+ basic_machine=i386-pc
751
+ os=-msdos
752
+ ;;
753
+ ms1-*)
754
+ basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
755
+ ;;
756
+ mvs)
757
+ basic_machine=i370-ibm
758
+ os=-mvs
759
+ ;;
760
+ ncr3000)
761
+ basic_machine=i486-ncr
762
+ os=-sysv4
763
+ ;;
764
+ netbsd386)
765
+ basic_machine=i386-unknown
766
+ os=-netbsd
767
+ ;;
768
+ netwinder)
769
+ basic_machine=armv4l-rebel
770
+ os=-linux
771
+ ;;
772
+ news | news700 | news800 | news900)
773
+ basic_machine=m68k-sony
774
+ os=-newsos
775
+ ;;
776
+ news1000)
777
+ basic_machine=m68030-sony
778
+ os=-newsos
779
+ ;;
780
+ news-3600 | risc-news)
781
+ basic_machine=mips-sony
782
+ os=-newsos
783
+ ;;
784
+ necv70)
785
+ basic_machine=v70-nec
786
+ os=-sysv
787
+ ;;
788
+ next | m*-next )
789
+ basic_machine=m68k-next
790
+ case $os in
791
+ -nextstep* )
792
+ ;;
793
+ -ns2*)
794
+ os=-nextstep2
795
+ ;;
796
+ *)
797
+ os=-nextstep3
798
+ ;;
799
+ esac
800
+ ;;
801
+ nh3000)
802
+ basic_machine=m68k-harris
803
+ os=-cxux
804
+ ;;
805
+ nh[45]000)
806
+ basic_machine=m88k-harris
807
+ os=-cxux
808
+ ;;
809
+ nindy960)
810
+ basic_machine=i960-intel
811
+ os=-nindy
812
+ ;;
813
+ mon960)
814
+ basic_machine=i960-intel
815
+ os=-mon960
816
+ ;;
817
+ nonstopux)
818
+ basic_machine=mips-compaq
819
+ os=-nonstopux
820
+ ;;
821
+ np1)
822
+ basic_machine=np1-gould
823
+ ;;
824
+ nsr-tandem)
825
+ basic_machine=nsr-tandem
826
+ ;;
827
+ op50n-* | op60c-*)
828
+ basic_machine=hppa1.1-oki
829
+ os=-proelf
830
+ ;;
831
+ openrisc | openrisc-*)
832
+ basic_machine=or32-unknown
833
+ ;;
834
+ os400)
835
+ basic_machine=powerpc-ibm
836
+ os=-os400
837
+ ;;
838
+ OSE68000 | ose68000)
839
+ basic_machine=m68000-ericsson
840
+ os=-ose
841
+ ;;
842
+ os68k)
843
+ basic_machine=m68k-none
844
+ os=-os68k
845
+ ;;
846
+ pa-hitachi)
847
+ basic_machine=hppa1.1-hitachi
848
+ os=-hiuxwe2
849
+ ;;
850
+ paragon)
851
+ basic_machine=i860-intel
852
+ os=-osf
853
+ ;;
854
+ parisc)
855
+ basic_machine=hppa-unknown
856
+ os=-linux
857
+ ;;
858
+ parisc-*)
859
+ basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
860
+ os=-linux
861
+ ;;
862
+ pbd)
863
+ basic_machine=sparc-tti
864
+ ;;
865
+ pbb)
866
+ basic_machine=m68k-tti
867
+ ;;
868
+ pc532 | pc532-*)
869
+ basic_machine=ns32k-pc532
870
+ ;;
871
+ pc98)
872
+ basic_machine=i386-pc
873
+ ;;
874
+ pc98-*)
875
+ basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
876
+ ;;
877
+ pentium | p5 | k5 | k6 | nexgen | viac3)
878
+ basic_machine=i586-pc
879
+ ;;
880
+ pentiumpro | p6 | 6x86 | athlon | athlon_*)
881
+ basic_machine=i686-pc
882
+ ;;
883
+ pentiumii | pentium2 | pentiumiii | pentium3)
884
+ basic_machine=i686-pc
885
+ ;;
886
+ pentium4)
887
+ basic_machine=i786-pc
888
+ ;;
889
+ pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
890
+ basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
891
+ ;;
892
+ pentiumpro-* | p6-* | 6x86-* | athlon-*)
893
+ basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
894
+ ;;
895
+ pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
896
+ basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
897
+ ;;
898
+ pentium4-*)
899
+ basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
900
+ ;;
901
+ pn)
902
+ basic_machine=pn-gould
903
+ ;;
904
+ power) basic_machine=power-ibm
905
+ ;;
906
+ ppc) basic_machine=powerpc-unknown
907
+ ;;
908
+ ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
909
+ ;;
910
+ ppcle | powerpclittle | ppc-le | powerpc-little)
911
+ basic_machine=powerpcle-unknown
912
+ ;;
913
+ ppcle-* | powerpclittle-*)
914
+ basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
915
+ ;;
916
+ ppc64) basic_machine=powerpc64-unknown
917
+ ;;
918
+ ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
919
+ ;;
920
+ ppc64le | powerpc64little | ppc64-le | powerpc64-little)
921
+ basic_machine=powerpc64le-unknown
922
+ ;;
923
+ ppc64le-* | powerpc64little-*)
924
+ basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
925
+ ;;
926
+ ps2)
927
+ basic_machine=i386-ibm
928
+ ;;
929
+ pw32)
930
+ basic_machine=i586-unknown
931
+ os=-pw32
932
+ ;;
933
+ rdos)
934
+ basic_machine=i386-pc
935
+ os=-rdos
936
+ ;;
937
+ rom68k)
938
+ basic_machine=m68k-rom68k
939
+ os=-coff
940
+ ;;
941
+ rm[46]00)
942
+ basic_machine=mips-siemens
943
+ ;;
944
+ rtpc | rtpc-*)
945
+ basic_machine=romp-ibm
946
+ ;;
947
+ s390 | s390-*)
948
+ basic_machine=s390-ibm
949
+ ;;
950
+ s390x | s390x-*)
951
+ basic_machine=s390x-ibm
952
+ ;;
953
+ sa29200)
954
+ basic_machine=a29k-amd
955
+ os=-udi
956
+ ;;
957
+ sb1)
958
+ basic_machine=mipsisa64sb1-unknown
959
+ ;;
960
+ sb1el)
961
+ basic_machine=mipsisa64sb1el-unknown
962
+ ;;
963
+ sde)
964
+ basic_machine=mipsisa32-sde
965
+ os=-elf
966
+ ;;
967
+ sei)
968
+ basic_machine=mips-sei
969
+ os=-seiux
970
+ ;;
971
+ sequent)
972
+ basic_machine=i386-sequent
973
+ ;;
974
+ sh)
975
+ basic_machine=sh-hitachi
976
+ os=-hms
977
+ ;;
978
+ sh5el)
979
+ basic_machine=sh5le-unknown
980
+ ;;
981
+ sh64)
982
+ basic_machine=sh64-unknown
983
+ ;;
984
+ sparclite-wrs | simso-wrs)
985
+ basic_machine=sparclite-wrs
986
+ os=-vxworks
987
+ ;;
988
+ sps7)
989
+ basic_machine=m68k-bull
990
+ os=-sysv2
991
+ ;;
992
+ spur)
993
+ basic_machine=spur-unknown
994
+ ;;
995
+ st2000)
996
+ basic_machine=m68k-tandem
997
+ ;;
998
+ stratus)
999
+ basic_machine=i860-stratus
1000
+ os=-sysv4
1001
+ ;;
1002
+ sun2)
1003
+ basic_machine=m68000-sun
1004
+ ;;
1005
+ sun2os3)
1006
+ basic_machine=m68000-sun
1007
+ os=-sunos3
1008
+ ;;
1009
+ sun2os4)
1010
+ basic_machine=m68000-sun
1011
+ os=-sunos4
1012
+ ;;
1013
+ sun3os3)
1014
+ basic_machine=m68k-sun
1015
+ os=-sunos3
1016
+ ;;
1017
+ sun3os4)
1018
+ basic_machine=m68k-sun
1019
+ os=-sunos4
1020
+ ;;
1021
+ sun4os3)
1022
+ basic_machine=sparc-sun
1023
+ os=-sunos3
1024
+ ;;
1025
+ sun4os4)
1026
+ basic_machine=sparc-sun
1027
+ os=-sunos4
1028
+ ;;
1029
+ sun4sol2)
1030
+ basic_machine=sparc-sun
1031
+ os=-solaris2
1032
+ ;;
1033
+ sun3 | sun3-*)
1034
+ basic_machine=m68k-sun
1035
+ ;;
1036
+ sun4)
1037
+ basic_machine=sparc-sun
1038
+ ;;
1039
+ sun386 | sun386i | roadrunner)
1040
+ basic_machine=i386-sun
1041
+ ;;
1042
+ sv1)
1043
+ basic_machine=sv1-cray
1044
+ os=-unicos
1045
+ ;;
1046
+ symmetry)
1047
+ basic_machine=i386-sequent
1048
+ os=-dynix
1049
+ ;;
1050
+ t3e)
1051
+ basic_machine=alphaev5-cray
1052
+ os=-unicos
1053
+ ;;
1054
+ t90)
1055
+ basic_machine=t90-cray
1056
+ os=-unicos
1057
+ ;;
1058
+ tic54x | c54x*)
1059
+ basic_machine=tic54x-unknown
1060
+ os=-coff
1061
+ ;;
1062
+ tic55x | c55x*)
1063
+ basic_machine=tic55x-unknown
1064
+ os=-coff
1065
+ ;;
1066
+ tic6x | c6x*)
1067
+ basic_machine=tic6x-unknown
1068
+ os=-coff
1069
+ ;;
1070
+ tile*)
1071
+ basic_machine=tile-unknown
1072
+ os=-linux-gnu
1073
+ ;;
1074
+ tx39)
1075
+ basic_machine=mipstx39-unknown
1076
+ ;;
1077
+ tx39el)
1078
+ basic_machine=mipstx39el-unknown
1079
+ ;;
1080
+ toad1)
1081
+ basic_machine=pdp10-xkl
1082
+ os=-tops20
1083
+ ;;
1084
+ tower | tower-32)
1085
+ basic_machine=m68k-ncr
1086
+ ;;
1087
+ tpf)
1088
+ basic_machine=s390x-ibm
1089
+ os=-tpf
1090
+ ;;
1091
+ udi29k)
1092
+ basic_machine=a29k-amd
1093
+ os=-udi
1094
+ ;;
1095
+ ultra3)
1096
+ basic_machine=a29k-nyu
1097
+ os=-sym1
1098
+ ;;
1099
+ v810 | necv810)
1100
+ basic_machine=v810-nec
1101
+ os=-none
1102
+ ;;
1103
+ vaxv)
1104
+ basic_machine=vax-dec
1105
+ os=-sysv
1106
+ ;;
1107
+ vms)
1108
+ basic_machine=vax-dec
1109
+ os=-vms
1110
+ ;;
1111
+ vpp*|vx|vx-*)
1112
+ basic_machine=f301-fujitsu
1113
+ ;;
1114
+ vxworks960)
1115
+ basic_machine=i960-wrs
1116
+ os=-vxworks
1117
+ ;;
1118
+ vxworks68)
1119
+ basic_machine=m68k-wrs
1120
+ os=-vxworks
1121
+ ;;
1122
+ vxworks29k)
1123
+ basic_machine=a29k-wrs
1124
+ os=-vxworks
1125
+ ;;
1126
+ w65*)
1127
+ basic_machine=w65-wdc
1128
+ os=-none
1129
+ ;;
1130
+ w89k-*)
1131
+ basic_machine=hppa1.1-winbond
1132
+ os=-proelf
1133
+ ;;
1134
+ xbox)
1135
+ basic_machine=i686-pc
1136
+ os=-mingw32
1137
+ ;;
1138
+ xps | xps100)
1139
+ basic_machine=xps100-honeywell
1140
+ ;;
1141
+ ymp)
1142
+ basic_machine=ymp-cray
1143
+ os=-unicos
1144
+ ;;
1145
+ z8k-*-coff)
1146
+ basic_machine=z8k-unknown
1147
+ os=-sim
1148
+ ;;
1149
+ none)
1150
+ basic_machine=none-none
1151
+ os=-none
1152
+ ;;
1153
+
1154
+ # Here we handle the default manufacturer of certain CPU types. It is in
1155
+ # some cases the only manufacturer, in others, it is the most popular.
1156
+ w89k)
1157
+ basic_machine=hppa1.1-winbond
1158
+ ;;
1159
+ op50n)
1160
+ basic_machine=hppa1.1-oki
1161
+ ;;
1162
+ op60c)
1163
+ basic_machine=hppa1.1-oki
1164
+ ;;
1165
+ romp)
1166
+ basic_machine=romp-ibm
1167
+ ;;
1168
+ mmix)
1169
+ basic_machine=mmix-knuth
1170
+ ;;
1171
+ rs6000)
1172
+ basic_machine=rs6000-ibm
1173
+ ;;
1174
+ vax)
1175
+ basic_machine=vax-dec
1176
+ ;;
1177
+ pdp10)
1178
+ # there are many clones, so DEC is not a safe bet
1179
+ basic_machine=pdp10-unknown
1180
+ ;;
1181
+ pdp11)
1182
+ basic_machine=pdp11-dec
1183
+ ;;
1184
+ we32k)
1185
+ basic_machine=we32k-att
1186
+ ;;
1187
+ sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele)
1188
+ basic_machine=sh-unknown
1189
+ ;;
1190
+ sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
1191
+ basic_machine=sparc-sun
1192
+ ;;
1193
+ cydra)
1194
+ basic_machine=cydra-cydrome
1195
+ ;;
1196
+ orion)
1197
+ basic_machine=orion-highlevel
1198
+ ;;
1199
+ orion105)
1200
+ basic_machine=clipper-highlevel
1201
+ ;;
1202
+ mac | mpw | mac-mpw)
1203
+ basic_machine=m68k-apple
1204
+ ;;
1205
+ pmac | pmac-mpw)
1206
+ basic_machine=powerpc-apple
1207
+ ;;
1208
+ *-unknown)
1209
+ # Make sure to match an already-canonicalized machine name.
1210
+ ;;
1211
+ *)
1212
+ echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
1213
+ exit 1
1214
+ ;;
1215
+ esac
1216
+
1217
+ # Here we canonicalize certain aliases for manufacturers.
1218
+ case $basic_machine in
1219
+ *-digital*)
1220
+ basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
1221
+ ;;
1222
+ *-commodore*)
1223
+ basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
1224
+ ;;
1225
+ *)
1226
+ ;;
1227
+ esac
1228
+
1229
+ # Decode manufacturer-specific aliases for certain operating systems.
1230
+
1231
+ if [ x"$os" != x"" ]
1232
+ then
1233
+ case $os in
1234
+ # First match some system type aliases
1235
+ # that might get confused with valid system types.
1236
+ # -solaris* is a basic system type, with this one exception.
1237
+ -solaris1 | -solaris1.*)
1238
+ os=`echo $os | sed -e 's|solaris1|sunos4|'`
1239
+ ;;
1240
+ -solaris)
1241
+ os=-solaris2
1242
+ ;;
1243
+ -svr4*)
1244
+ os=-sysv4
1245
+ ;;
1246
+ -unixware*)
1247
+ os=-sysv4.2uw
1248
+ ;;
1249
+ -gnu/linux*)
1250
+ os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
1251
+ ;;
1252
+ # First accept the basic system types.
1253
+ # The portable systems comes first.
1254
+ # Each alternative MUST END IN A *, to match a version number.
1255
+ # -sysv* is not here because it comes later, after sysvr4.
1256
+ -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
1257
+ | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
1258
+ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
1259
+ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
1260
+ | -aos* \
1261
+ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
1262
+ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
1263
+ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
1264
+ | -openbsd* | -solidbsd* \
1265
+ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
1266
+ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
1267
+ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
1268
+ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
1269
+ | -chorusos* | -chorusrdb* \
1270
+ | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
1271
+ | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
1272
+ | -uxpv* | -beos* | -mpeix* | -udk* \
1273
+ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
1274
+ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
1275
+ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
1276
+ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
1277
+ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
1278
+ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
1279
+ | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -irx*)
1280
+ # Remember, each alternative MUST END IN *, to match a version number.
1281
+ ;;
1282
+ -qnx*)
1283
+ case $basic_machine in
1284
+ x86-* | i*86-*)
1285
+ ;;
1286
+ *)
1287
+ os=-nto$os
1288
+ ;;
1289
+ esac
1290
+ ;;
1291
+ -nto-qnx*)
1292
+ ;;
1293
+ -nto*)
1294
+ os=`echo $os | sed -e 's|nto|nto-qnx|'`
1295
+ ;;
1296
+ -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
1297
+ | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
1298
+ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
1299
+ ;;
1300
+ -mac*)
1301
+ os=`echo $os | sed -e 's|mac|macos|'`
1302
+ ;;
1303
+ -linux-dietlibc)
1304
+ os=-linux-dietlibc
1305
+ ;;
1306
+ -linux*)
1307
+ os=`echo $os | sed -e 's|linux|linux-gnu|'`
1308
+ ;;
1309
+ -sunos5*)
1310
+ os=`echo $os | sed -e 's|sunos5|solaris2|'`
1311
+ ;;
1312
+ -sunos6*)
1313
+ os=`echo $os | sed -e 's|sunos6|solaris3|'`
1314
+ ;;
1315
+ -opened*)
1316
+ os=-openedition
1317
+ ;;
1318
+ -os400*)
1319
+ os=-os400
1320
+ ;;
1321
+ -wince*)
1322
+ os=-wince
1323
+ ;;
1324
+ -osfrose*)
1325
+ os=-osfrose
1326
+ ;;
1327
+ -osf*)
1328
+ os=-osf
1329
+ ;;
1330
+ -utek*)
1331
+ os=-bsd
1332
+ ;;
1333
+ -dynix*)
1334
+ os=-bsd
1335
+ ;;
1336
+ -acis*)
1337
+ os=-aos
1338
+ ;;
1339
+ -atheos*)
1340
+ os=-atheos
1341
+ ;;
1342
+ -syllable*)
1343
+ os=-syllable
1344
+ ;;
1345
+ -386bsd)
1346
+ os=-bsd
1347
+ ;;
1348
+ -ctix* | -uts*)
1349
+ os=-sysv
1350
+ ;;
1351
+ -nova*)
1352
+ os=-rtmk-nova
1353
+ ;;
1354
+ -ns2 )
1355
+ os=-nextstep2
1356
+ ;;
1357
+ -nsk*)
1358
+ os=-nsk
1359
+ ;;
1360
+ # Preserve the version number of sinix5.
1361
+ -sinix5.*)
1362
+ os=`echo $os | sed -e 's|sinix|sysv|'`
1363
+ ;;
1364
+ -sinix*)
1365
+ os=-sysv4
1366
+ ;;
1367
+ -tpf*)
1368
+ os=-tpf
1369
+ ;;
1370
+ -triton*)
1371
+ os=-sysv3
1372
+ ;;
1373
+ -oss*)
1374
+ os=-sysv3
1375
+ ;;
1376
+ -svr4)
1377
+ os=-sysv4
1378
+ ;;
1379
+ -svr3)
1380
+ os=-sysv3
1381
+ ;;
1382
+ -sysvr4)
1383
+ os=-sysv4
1384
+ ;;
1385
+ # This must come after -sysvr4.
1386
+ -sysv*)
1387
+ ;;
1388
+ -ose*)
1389
+ os=-ose
1390
+ ;;
1391
+ -es1800*)
1392
+ os=-ose
1393
+ ;;
1394
+ -xenix)
1395
+ os=-xenix
1396
+ ;;
1397
+ -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
1398
+ os=-mint
1399
+ ;;
1400
+ -aros*)
1401
+ os=-aros
1402
+ ;;
1403
+ -kaos*)
1404
+ os=-kaos
1405
+ ;;
1406
+ -zvmoe)
1407
+ os=-zvmoe
1408
+ ;;
1409
+ -none)
1410
+ ;;
1411
+ *)
1412
+ # Get rid of the `-' at the beginning of $os.
1413
+ os=`echo $os | sed 's/[^-]*-//'`
1414
+ echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
1415
+ exit 1
1416
+ ;;
1417
+ esac
1418
+ else
1419
+
1420
+ # Here we handle the default operating systems that come with various machines.
1421
+ # The value should be what the vendor currently ships out the door with their
1422
+ # machine or put another way, the most popular os provided with the machine.
1423
+
1424
+ # Note that if you're going to try to match "-MANUFACTURER" here (say,
1425
+ # "-sun"), then you have to tell the case statement up towards the top
1426
+ # that MANUFACTURER isn't an operating system. Otherwise, code above
1427
+ # will signal an error saying that MANUFACTURER isn't an operating
1428
+ # system, and we'll never get to this point.
1429
+
1430
+ case $basic_machine in
1431
+ score-*)
1432
+ os=-elf
1433
+ ;;
1434
+ spu-*)
1435
+ os=-elf
1436
+ ;;
1437
+ *-acorn)
1438
+ os=-riscix1.2
1439
+ ;;
1440
+ arm*-rebel)
1441
+ os=-linux
1442
+ ;;
1443
+ arm*-semi)
1444
+ os=-aout
1445
+ ;;
1446
+ c4x-* | tic4x-*)
1447
+ os=-coff
1448
+ ;;
1449
+ # This must come before the *-dec entry.
1450
+ pdp10-*)
1451
+ os=-tops20
1452
+ ;;
1453
+ pdp11-*)
1454
+ os=-none
1455
+ ;;
1456
+ *-dec | vax-*)
1457
+ os=-ultrix4.2
1458
+ ;;
1459
+ m68*-apollo)
1460
+ os=-domain
1461
+ ;;
1462
+ i386-sun)
1463
+ os=-sunos4.0.2
1464
+ ;;
1465
+ m68000-sun)
1466
+ os=-sunos3
1467
+ # This also exists in the configure program, but was not the
1468
+ # default.
1469
+ # os=-sunos4
1470
+ ;;
1471
+ m68*-cisco)
1472
+ os=-aout
1473
+ ;;
1474
+ mep-*)
1475
+ os=-elf
1476
+ ;;
1477
+ mips*-cisco)
1478
+ os=-elf
1479
+ ;;
1480
+ mips*-*)
1481
+ os=-elf
1482
+ ;;
1483
+ or32-*)
1484
+ os=-coff
1485
+ ;;
1486
+ *-tti) # must be before sparc entry or we get the wrong os.
1487
+ os=-sysv3
1488
+ ;;
1489
+ sparc-* | *-sun)
1490
+ os=-sunos4.1.1
1491
+ ;;
1492
+ *-be)
1493
+ os=-beos
1494
+ ;;
1495
+ *-haiku)
1496
+ os=-haiku
1497
+ ;;
1498
+ *-ibm)
1499
+ os=-aix
1500
+ ;;
1501
+ *-knuth)
1502
+ os=-mmixware
1503
+ ;;
1504
+ *-wec)
1505
+ os=-proelf
1506
+ ;;
1507
+ *-winbond)
1508
+ os=-proelf
1509
+ ;;
1510
+ *-oki)
1511
+ os=-proelf
1512
+ ;;
1513
+ *-hp)
1514
+ os=-hpux
1515
+ ;;
1516
+ *-hitachi)
1517
+ os=-hiux
1518
+ ;;
1519
+ i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
1520
+ os=-sysv
1521
+ ;;
1522
+ *-cbm)
1523
+ os=-amigaos
1524
+ ;;
1525
+ *-dg)
1526
+ os=-dgux
1527
+ ;;
1528
+ *-dolphin)
1529
+ os=-sysv3
1530
+ ;;
1531
+ m68k-ccur)
1532
+ os=-rtu
1533
+ ;;
1534
+ m88k-omron*)
1535
+ os=-luna
1536
+ ;;
1537
+ *-next )
1538
+ os=-nextstep
1539
+ ;;
1540
+ *-sequent)
1541
+ os=-ptx
1542
+ ;;
1543
+ *-crds)
1544
+ os=-unos
1545
+ ;;
1546
+ *-ns)
1547
+ os=-genix
1548
+ ;;
1549
+ i370-*)
1550
+ os=-mvs
1551
+ ;;
1552
+ *-next)
1553
+ os=-nextstep3
1554
+ ;;
1555
+ *-gould)
1556
+ os=-sysv
1557
+ ;;
1558
+ *-highlevel)
1559
+ os=-bsd
1560
+ ;;
1561
+ *-encore)
1562
+ os=-bsd
1563
+ ;;
1564
+ *-sgi)
1565
+ os=-irix
1566
+ ;;
1567
+ *-siemens)
1568
+ os=-sysv4
1569
+ ;;
1570
+ *-masscomp)
1571
+ os=-rtu
1572
+ ;;
1573
+ f30[01]-fujitsu | f700-fujitsu)
1574
+ os=-uxpv
1575
+ ;;
1576
+ *-rom68k)
1577
+ os=-coff
1578
+ ;;
1579
+ *-*bug)
1580
+ os=-coff
1581
+ ;;
1582
+ *-apple)
1583
+ os=-macos
1584
+ ;;
1585
+ *-atari*)
1586
+ os=-mint
1587
+ ;;
1588
+ *)
1589
+ os=-none
1590
+ ;;
1591
+ esac
1592
+ fi
1593
+
1594
+ # Here we handle the case where we know the os, and the CPU type, but not the
1595
+ # manufacturer. We pick the logical manufacturer.
1596
+ vendor=unknown
1597
+ case $basic_machine in
1598
+ *-unknown)
1599
+ case $os in
1600
+ -riscix*)
1601
+ vendor=acorn
1602
+ ;;
1603
+ -sunos*)
1604
+ vendor=sun
1605
+ ;;
1606
+ -aix*)
1607
+ vendor=ibm
1608
+ ;;
1609
+ -beos*)
1610
+ vendor=be
1611
+ ;;
1612
+ -hpux*)
1613
+ vendor=hp
1614
+ ;;
1615
+ -mpeix*)
1616
+ vendor=hp
1617
+ ;;
1618
+ -hiux*)
1619
+ vendor=hitachi
1620
+ ;;
1621
+ -unos*)
1622
+ vendor=crds
1623
+ ;;
1624
+ -dgux*)
1625
+ vendor=dg
1626
+ ;;
1627
+ -luna*)
1628
+ vendor=omron
1629
+ ;;
1630
+ -genix*)
1631
+ vendor=ns
1632
+ ;;
1633
+ -mvs* | -opened*)
1634
+ vendor=ibm
1635
+ ;;
1636
+ -os400*)
1637
+ vendor=ibm
1638
+ ;;
1639
+ -ptx*)
1640
+ vendor=sequent
1641
+ ;;
1642
+ -tpf*)
1643
+ vendor=ibm
1644
+ ;;
1645
+ -vxsim* | -vxworks* | -windiss*)
1646
+ vendor=wrs
1647
+ ;;
1648
+ -aux*)
1649
+ vendor=apple
1650
+ ;;
1651
+ -hms*)
1652
+ vendor=hitachi
1653
+ ;;
1654
+ -mpw* | -macos*)
1655
+ vendor=apple
1656
+ ;;
1657
+ -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
1658
+ vendor=atari
1659
+ ;;
1660
+ -vos*)
1661
+ vendor=stratus
1662
+ ;;
1663
+ esac
1664
+ basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
1665
+ ;;
1666
+ esac
1667
+
1668
+ echo $basic_machine$os
1669
+ exit
1670
+
1671
+ # Local variables:
1672
+ # eval: (add-hook 'write-file-hooks 'time-stamp)
1673
+ # time-stamp-start: "timestamp='"
1674
+ # time-stamp-format: "%:y-%02m-%02d"
1675
+ # time-stamp-end: "'"
1676
+ # End:
mosesdecoder/contrib/lmserver/configure.ac ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AC_PREREQ(2.52)
2
+ AC_INIT(lmserver, 1.0)
3
+ AC_CANONICAL_SYSTEM
4
+ AC_CONFIG_SRCDIR(lmserver.c)
5
+ AM_INIT_AUTOMAKE(AC_PACKAGE_NAME, AC_PACKAGE_VERSION)
6
+ AM_CONFIG_HEADER(config.h)
7
+
8
+ AC_PROG_CC
9
+ AC_PROG_CXX
10
+ AM_PROG_CC_C_O
11
+ AC_PROG_INSTALL
12
+
13
+ AC_ARG_WITH(srilm,
14
+ [AC_HELP_STRING([--with-srilm=PATH], [(optional) path to SRI's LM toolkit])],
15
+ [with_srilm=$withval],
16
+ [with_srilm=no]
17
+ )
18
+
19
+ AC_ARG_ENABLE(64bit,
20
+ [AS_HELP_STRING([--enable-64bit],[build 64bit verison])])
21
+ if test "x$enable_64bit" == "xyes"
22
+ then
23
+ org_cflags=$CFLAGS
24
+ CFLAGS=-m64
25
+ AC_RUN_IFELSE(
26
+ [AC_LANG_PROGRAM([], [dnl
27
+ return sizeof(void*) == 8 ? 0 : 1;
28
+ ])
29
+ ],[
30
+ CFLAGS="-m64 $org_cflags"
31
+ ],[
32
+ AC_MSG_ERROR([Don't know how to build a 64-bit object.])
33
+ ])
34
+ fi
35
+
36
+ trylibeventdir=""
37
+ AC_ARG_WITH(libevent,
38
+ [ --with-libevent=PATH Specify path to libevent installation ],
39
+ [
40
+ if test "x$withval" != "xno" ; then
41
+ trylibeventdir=$withval
42
+ fi
43
+ ]
44
+ )
45
+
46
+ if test "x$with_srilm" != 'xno'
47
+ then
48
+ SAVE_CPPFLAGS="$CPPFLAGS"
49
+ CPPFLAGS="$CPPFLAGS -I${with_srilm}/include"
50
+
51
+ AC_CHECK_HEADER(Prob.h,
52
+ [AC_DEFINE([HAVE_SRILM], [], [flag for SRILM])],
53
+ [AC_MSG_ERROR([Cannot find SRILM!])])
54
+
55
+ LIB_SRILM="-loolm -ldstruct -lmisc"
56
+ # ROOT/lib/i686-m64/liboolm.a
57
+ # ROOT/lib/i686-m64/libdstruct.a
58
+ # ROOT/lib/i686-m64/libmisc.a
59
+ MY_ARCH=`${with_srilm}/sbin/machine-type`
60
+ LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH}"
61
+ LIBS="$LIBS $LIB_SRILM"
62
+ FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a"
63
+ AM_CONDITIONAL([SRI_LM], true)
64
+ fi
65
+
66
+
67
+ dnl ------------------------------------------------------
68
+ dnl libevent detection. swiped from Tor. modified a bit.
69
+
70
+ LIBEVENT_URL=http://www.monkey.org/~provos/libevent/
71
+
72
+ AC_CACHE_CHECK([for libevent directory], ac_cv_libevent_dir, [
73
+ saved_LIBS="$LIBS"
74
+ saved_LDFLAGS="$LDFLAGS"
75
+ saved_CPPFLAGS="$CPPFLAGS"
76
+ le_found=no
77
+ for ledir in $trylibeventdir "" $prefix /usr/local ; do
78
+ LDFLAGS="$saved_LDFLAGS"
79
+ LIBS="$saved_LIBS -levent"
80
+
81
+ # Skip the directory if it isn't there.
82
+ if test ! -z "$ledir" -a ! -d "$ledir" ; then
83
+ continue;
84
+ fi
85
+ if test ! -z "$ledir" ; then
86
+ if test -d "$ledir/lib" ; then
87
+ LDFLAGS="-L$ledir/lib $LDFLAGS"
88
+ else
89
+ LDFLAGS="-L$ledir $LDFLAGS"
90
+ fi
91
+ if test -d "$ledir/include" ; then
92
+ CPPFLAGS="-I$ledir/include $CPPFLAGS"
93
+ else
94
+ CPPFLAGS="-I$ledir $CPPFLAGS"
95
+ fi
96
+ fi
97
+ # Can I compile and link it?
98
+ AC_TRY_LINK([#include <sys/time.h>
99
+ #include <sys/types.h>
100
+ #include <event.h>], [ event_init(); ],
101
+ [ libevent_linked=yes ], [ libevent_linked=no ])
102
+ if test $libevent_linked = yes; then
103
+ if test ! -z "$ledir" ; then
104
+ ac_cv_libevent_dir=$ledir
105
+ else
106
+ ac_cv_libevent_dir="(system)"
107
+ fi
108
+ le_found=yes
109
+ break
110
+ fi
111
+ done
112
+ LIBS="$saved_LIBS"
113
+ LDFLAGS="$saved_LDFLAGS"
114
+ CPPFLAGS="$saved_CPPFLAGS"
115
+ if test $le_found = no ; then
116
+ AC_MSG_ERROR([libevent is required. You can get it from $LIBEVENT_URL
117
+
118
+ If it's already installed, specify its path using --with-libevent=/dir/
119
+ ])
120
+ fi
121
+ ])
122
+ LIBS="$LIBS -levent"
123
+ if test $ac_cv_libevent_dir != "(system)"; then
124
+ if test -d "$ac_cv_libevent_dir/lib" ; then
125
+ LDFLAGS="-L$ac_cv_libevent_dir/lib $LDFLAGS"
126
+ le_libdir="$ac_cv_libevent_dir/lib"
127
+ else
128
+ LDFLAGS="-L$ac_cv_libevent_dir $LDFLAGS"
129
+ le_libdir="$ac_cv_libevent_dir"
130
+ fi
131
+ if test -d "$ac_cv_libevent_dir/include" ; then
132
+ CPPFLAGS="-I$ac_cv_libevent_dir/include $CPPFLAGS"
133
+ else
134
+ CPPFLAGS="-I$ac_cv_libevent_dir $CPPFLAGS"
135
+ fi
136
+ fi
137
+
138
+ dnl ----------------------------------------------------------------------------
139
+
140
+ AC_SEARCH_LIBS(socket, socket)
141
+ AC_SEARCH_LIBS(gethostbyname, nsl)
142
+ AC_SEARCH_LIBS(mallinfo, malloc)
143
+
144
+ AC_CHECK_FUNC(daemon,AC_DEFINE([HAVE_DAEMON],,[Define this if you have daemon()]),[DAEMON_OBJ=daemon.o])
145
+ AC_SUBST(DAEMON_OBJ)
146
+
147
+ AC_HEADER_STDBOOL
148
+ AC_C_CONST
149
+ AC_CHECK_HEADER(malloc.h, AC_DEFINE(HAVE_MALLOC_H,,[do we have malloc.h?]))
150
+ AC_CHECK_MEMBER([struct mallinfo.arena], [
151
+ AC_DEFINE(HAVE_STRUCT_MALLINFO,,[do we have stuct mallinfo?])
152
+ ], ,[
153
+ # include <malloc.h>
154
+ ]
155
+ )
156
+
157
+ dnl From licq: Copyright (c) 2000 Dirk Mueller
158
+ dnl Check if the type socklen_t is defined anywhere
159
+ AC_DEFUN([AC_C_SOCKLEN_T],
160
+ [AC_CACHE_CHECK(for socklen_t, ac_cv_c_socklen_t,
161
+ [
162
+ AC_TRY_COMPILE([
163
+ #include <sys/types.h>
164
+ #include <sys/socket.h>
165
+ ],[
166
+ socklen_t foo;
167
+ ],[
168
+ ac_cv_c_socklen_t=yes
169
+ ],[
170
+ ac_cv_c_socklen_t=no
171
+ ])
172
+ ])
173
+ if test $ac_cv_c_socklen_t = no; then
174
+ AC_DEFINE(socklen_t, int, [define to int if socklen_t not available])
175
+ fi
176
+ ])
177
+
178
+ AC_C_SOCKLEN_T
179
+
180
+ dnl Check if we're a little-endian or a big-endian system, needed by hash code
181
+ AC_DEFUN([AC_C_ENDIAN],
182
+ [AC_CACHE_CHECK(for endianness, ac_cv_c_endian,
183
+ [
184
+ AC_RUN_IFELSE(
185
+ [AC_LANG_PROGRAM([], [dnl
186
+ long val = 1;
187
+ char *c = (char *) &val;
188
+ exit(*c == 1);
189
+ ])
190
+ ],[
191
+ ac_cv_c_endian=big
192
+ ],[
193
+ ac_cv_c_endian=little
194
+ ])
195
+ ])
196
+ if test $ac_cv_c_endian = big; then
197
+ AC_DEFINE(ENDIAN_BIG, 1, [machine is bigendian])
198
+ fi
199
+ if test $ac_cv_c_endian = little; then
200
+ AC_DEFINE(ENDIAN_LITTLE, 1, [machine is littleendian])
201
+ fi
202
+ ])
203
+
204
+ AC_C_ENDIAN
205
+
206
+ dnl Check whether the user wants threads or not
207
+ AC_ARG_ENABLE(threads,
208
+ [AS_HELP_STRING([--enable-threads],[support multithreaded execution])])
209
+ if test "x$enable_threads" == "xyes"; then
210
+ AC_SEARCH_LIBS(pthread_create, pthread)
211
+ if test "x$ac_cv_search_pthread_create" != "xno"; then
212
+ AC_DEFINE([USE_THREADS],,[Define this if you want to use pthreads])
213
+ dnl Sun compilers need the -mt flag!
214
+ AC_RUN_IFELSE(
215
+ [AC_LANG_PROGRAM([], [dnl
216
+ #ifdef __SUNPRO_C
217
+ return 0;
218
+ #else
219
+ return 1;
220
+ #endif
221
+ ])
222
+ ],[
223
+ CFLAGS="-mt $CFLAGS"
224
+ ])
225
+ else
226
+ AC_MSG_ERROR([Can't enable threads without the POSIX thread library.])
227
+ fi
228
+ fi
229
+
230
+ AC_CHECK_FUNCS(mlockall)
231
+ AC_CHECK_FUNCS(getpagesizes)
232
+ AC_CHECK_FUNCS(memcntl)
233
+
234
+ AC_CONFIG_FILES(Makefile)
235
+ AC_OUTPUT
mosesdecoder/contrib/lmserver/depcomp ADDED
@@ -0,0 +1,589 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /bin/sh
2
+ # depcomp - compile a program generating dependencies as side-effects
3
+
4
+ scriptversion=2007-03-29.01
5
+
6
+ # Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007 Free Software
7
+ # Foundation, Inc.
8
+
9
+ # This program is free software; you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation; either version 2, or (at your option)
12
+ # any later version.
13
+
14
+ # This program is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with this program; if not, write to the Free Software
21
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
+ # 02110-1301, USA.
23
+
24
+ # As a special exception to the GNU General Public License, if you
25
+ # distribute this file as part of a program that contains a
26
+ # configuration script generated by Autoconf, you may include it under
27
+ # the same distribution terms that you use for the rest of that program.
28
+
29
+ # Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
30
+
31
+ case $1 in
32
+ '')
33
+ echo "$0: No command. Try \`$0 --help' for more information." 1>&2
34
+ exit 1;
35
+ ;;
36
+ -h | --h*)
37
+ cat <<\EOF
38
+ Usage: depcomp [--help] [--version] PROGRAM [ARGS]
39
+
40
+ Run PROGRAMS ARGS to compile a file, generating dependencies
41
+ as side-effects.
42
+
43
+ Environment variables:
44
+ depmode Dependency tracking mode.
45
+ source Source file read by `PROGRAMS ARGS'.
46
+ object Object file output by `PROGRAMS ARGS'.
47
+ DEPDIR directory where to store dependencies.
48
+ depfile Dependency file to output.
49
+ tmpdepfile Temporary file to use when outputing dependencies.
50
+ libtool Whether libtool is used (yes/no).
51
+
52
+ Report bugs to <bug-automake@gnu.org>.
53
+ EOF
54
+ exit $?
55
+ ;;
56
+ -v | --v*)
57
+ echo "depcomp $scriptversion"
58
+ exit $?
59
+ ;;
60
+ esac
61
+
62
+ if test -z "$depmode" || test -z "$source" || test -z "$object"; then
63
+ echo "depcomp: Variables source, object and depmode must be set" 1>&2
64
+ exit 1
65
+ fi
66
+
67
+ # Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
68
+ depfile=${depfile-`echo "$object" |
69
+ sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
70
+ tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
71
+
72
+ rm -f "$tmpdepfile"
73
+
74
+ # Some modes work just like other modes, but use different flags. We
75
+ # parameterize here, but still list the modes in the big case below,
76
+ # to make depend.m4 easier to write. Note that we *cannot* use a case
77
+ # here, because this file can only contain one case statement.
78
+ if test "$depmode" = hp; then
79
+ # HP compiler uses -M and no extra arg.
80
+ gccflag=-M
81
+ depmode=gcc
82
+ fi
83
+
84
+ if test "$depmode" = dashXmstdout; then
85
+ # This is just like dashmstdout with a different argument.
86
+ dashmflag=-xM
87
+ depmode=dashmstdout
88
+ fi
89
+
90
+ case "$depmode" in
91
+ gcc3)
92
+ ## gcc 3 implements dependency tracking that does exactly what
93
+ ## we want. Yay! Note: for some reason libtool 1.4 doesn't like
94
+ ## it if -MD -MP comes after the -MF stuff. Hmm.
95
+ ## Unfortunately, FreeBSD c89 acceptance of flags depends upon
96
+ ## the command line argument order; so add the flags where they
97
+ ## appear in depend2.am. Note that the slowdown incurred here
98
+ ## affects only configure: in makefiles, %FASTDEP% shortcuts this.
99
+ for arg
100
+ do
101
+ case $arg in
102
+ -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
103
+ *) set fnord "$@" "$arg" ;;
104
+ esac
105
+ shift # fnord
106
+ shift # $arg
107
+ done
108
+ "$@"
109
+ stat=$?
110
+ if test $stat -eq 0; then :
111
+ else
112
+ rm -f "$tmpdepfile"
113
+ exit $stat
114
+ fi
115
+ mv "$tmpdepfile" "$depfile"
116
+ ;;
117
+
118
+ gcc)
119
+ ## There are various ways to get dependency output from gcc. Here's
120
+ ## why we pick this rather obscure method:
121
+ ## - Don't want to use -MD because we'd like the dependencies to end
122
+ ## up in a subdir. Having to rename by hand is ugly.
123
+ ## (We might end up doing this anyway to support other compilers.)
124
+ ## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
125
+ ## -MM, not -M (despite what the docs say).
126
+ ## - Using -M directly means running the compiler twice (even worse
127
+ ## than renaming).
128
+ if test -z "$gccflag"; then
129
+ gccflag=-MD,
130
+ fi
131
+ "$@" -Wp,"$gccflag$tmpdepfile"
132
+ stat=$?
133
+ if test $stat -eq 0; then :
134
+ else
135
+ rm -f "$tmpdepfile"
136
+ exit $stat
137
+ fi
138
+ rm -f "$depfile"
139
+ echo "$object : \\" > "$depfile"
140
+ alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
141
+ ## The second -e expression handles DOS-style file names with drive letters.
142
+ sed -e 's/^[^:]*: / /' \
143
+ -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
144
+ ## This next piece of magic avoids the `deleted header file' problem.
145
+ ## The problem is that when a header file which appears in a .P file
146
+ ## is deleted, the dependency causes make to die (because there is
147
+ ## typically no way to rebuild the header). We avoid this by adding
148
+ ## dummy dependencies for each header file. Too bad gcc doesn't do
149
+ ## this for us directly.
150
+ tr ' ' '
151
+ ' < "$tmpdepfile" |
152
+ ## Some versions of gcc put a space before the `:'. On the theory
153
+ ## that the space means something, we add a space to the output as
154
+ ## well.
155
+ ## Some versions of the HPUX 10.20 sed can't process this invocation
156
+ ## correctly. Breaking it into two sed invocations is a workaround.
157
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
158
+ rm -f "$tmpdepfile"
159
+ ;;
160
+
161
+ hp)
162
+ # This case exists only to let depend.m4 do its work. It works by
163
+ # looking at the text of this script. This case will never be run,
164
+ # since it is checked for above.
165
+ exit 1
166
+ ;;
167
+
168
+ sgi)
169
+ if test "$libtool" = yes; then
170
+ "$@" "-Wp,-MDupdate,$tmpdepfile"
171
+ else
172
+ "$@" -MDupdate "$tmpdepfile"
173
+ fi
174
+ stat=$?
175
+ if test $stat -eq 0; then :
176
+ else
177
+ rm -f "$tmpdepfile"
178
+ exit $stat
179
+ fi
180
+ rm -f "$depfile"
181
+
182
+ if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
183
+ echo "$object : \\" > "$depfile"
184
+
185
+ # Clip off the initial element (the dependent). Don't try to be
186
+ # clever and replace this with sed code, as IRIX sed won't handle
187
+ # lines with more than a fixed number of characters (4096 in
188
+ # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
189
+ # the IRIX cc adds comments like `#:fec' to the end of the
190
+ # dependency line.
191
+ tr ' ' '
192
+ ' < "$tmpdepfile" \
193
+ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
194
+ tr '
195
+ ' ' ' >> $depfile
196
+ echo >> $depfile
197
+
198
+ # The second pass generates a dummy entry for each header file.
199
+ tr ' ' '
200
+ ' < "$tmpdepfile" \
201
+ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
202
+ >> $depfile
203
+ else
204
+ # The sourcefile does not contain any dependencies, so just
205
+ # store a dummy comment line, to avoid errors with the Makefile
206
+ # "include basename.Plo" scheme.
207
+ echo "#dummy" > "$depfile"
208
+ fi
209
+ rm -f "$tmpdepfile"
210
+ ;;
211
+
212
+ aix)
213
+ # The C for AIX Compiler uses -M and outputs the dependencies
214
+ # in a .u file. In older versions, this file always lives in the
215
+ # current directory. Also, the AIX compiler puts `$object:' at the
216
+ # start of each line; $object doesn't have directory information.
217
+ # Version 6 uses the directory in both cases.
218
+ dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
219
+ test "x$dir" = "x$object" && dir=
220
+ base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
221
+ if test "$libtool" = yes; then
222
+ tmpdepfile1=$dir$base.u
223
+ tmpdepfile2=$base.u
224
+ tmpdepfile3=$dir.libs/$base.u
225
+ "$@" -Wc,-M
226
+ else
227
+ tmpdepfile1=$dir$base.u
228
+ tmpdepfile2=$dir$base.u
229
+ tmpdepfile3=$dir$base.u
230
+ "$@" -M
231
+ fi
232
+ stat=$?
233
+
234
+ if test $stat -eq 0; then :
235
+ else
236
+ rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
237
+ exit $stat
238
+ fi
239
+
240
+ for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
241
+ do
242
+ test -f "$tmpdepfile" && break
243
+ done
244
+ if test -f "$tmpdepfile"; then
245
+ # Each line is of the form `foo.o: dependent.h'.
246
+ # Do two passes, one to just change these to
247
+ # `$object: dependent.h' and one to simply `dependent.h:'.
248
+ sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
249
+ # That's a tab and a space in the [].
250
+ sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
251
+ else
252
+ # The sourcefile does not contain any dependencies, so just
253
+ # store a dummy comment line, to avoid errors with the Makefile
254
+ # "include basename.Plo" scheme.
255
+ echo "#dummy" > "$depfile"
256
+ fi
257
+ rm -f "$tmpdepfile"
258
+ ;;
259
+
260
+ icc)
261
+ # Intel's C compiler understands `-MD -MF file'. However on
262
+ # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
263
+ # ICC 7.0 will fill foo.d with something like
264
+ # foo.o: sub/foo.c
265
+ # foo.o: sub/foo.h
266
+ # which is wrong. We want:
267
+ # sub/foo.o: sub/foo.c
268
+ # sub/foo.o: sub/foo.h
269
+ # sub/foo.c:
270
+ # sub/foo.h:
271
+ # ICC 7.1 will output
272
+ # foo.o: sub/foo.c sub/foo.h
273
+ # and will wrap long lines using \ :
274
+ # foo.o: sub/foo.c ... \
275
+ # sub/foo.h ... \
276
+ # ...
277
+
278
+ "$@" -MD -MF "$tmpdepfile"
279
+ stat=$?
280
+ if test $stat -eq 0; then :
281
+ else
282
+ rm -f "$tmpdepfile"
283
+ exit $stat
284
+ fi
285
+ rm -f "$depfile"
286
+ # Each line is of the form `foo.o: dependent.h',
287
+ # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
288
+ # Do two passes, one to just change these to
289
+ # `$object: dependent.h' and one to simply `dependent.h:'.
290
+ sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
291
+ # Some versions of the HPUX 10.20 sed can't process this invocation
292
+ # correctly. Breaking it into two sed invocations is a workaround.
293
+ sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
294
+ sed -e 's/$/ :/' >> "$depfile"
295
+ rm -f "$tmpdepfile"
296
+ ;;
297
+
298
+ hp2)
299
+ # The "hp" stanza above does not work with aCC (C++) and HP's ia64
300
+ # compilers, which have integrated preprocessors. The correct option
301
+ # to use with these is +Maked; it writes dependencies to a file named
302
+ # 'foo.d', which lands next to the object file, wherever that
303
+ # happens to be.
304
+ # Much of this is similar to the tru64 case; see comments there.
305
+ dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
306
+ test "x$dir" = "x$object" && dir=
307
+ base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
308
+ if test "$libtool" = yes; then
309
+ tmpdepfile1=$dir$base.d
310
+ tmpdepfile2=$dir.libs/$base.d
311
+ "$@" -Wc,+Maked
312
+ else
313
+ tmpdepfile1=$dir$base.d
314
+ tmpdepfile2=$dir$base.d
315
+ "$@" +Maked
316
+ fi
317
+ stat=$?
318
+ if test $stat -eq 0; then :
319
+ else
320
+ rm -f "$tmpdepfile1" "$tmpdepfile2"
321
+ exit $stat
322
+ fi
323
+
324
+ for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
325
+ do
326
+ test -f "$tmpdepfile" && break
327
+ done
328
+ if test -f "$tmpdepfile"; then
329
+ sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
330
+ # Add `dependent.h:' lines.
331
+ sed -ne '2,${; s/^ *//; s/ \\*$//; s/$/:/; p;}' "$tmpdepfile" >> "$depfile"
332
+ else
333
+ echo "#dummy" > "$depfile"
334
+ fi
335
+ rm -f "$tmpdepfile" "$tmpdepfile2"
336
+ ;;
337
+
338
+ tru64)
339
+ # The Tru64 compiler uses -MD to generate dependencies as a side
340
+ # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
341
+ # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
342
+ # dependencies in `foo.d' instead, so we check for that too.
343
+ # Subdirectories are respected.
344
+ dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
345
+ test "x$dir" = "x$object" && dir=
346
+ base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
347
+
348
+ if test "$libtool" = yes; then
349
+ # With Tru64 cc, shared objects can also be used to make a
350
+ # static library. This mechanism is used in libtool 1.4 series to
351
+ # handle both shared and static libraries in a single compilation.
352
+ # With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
353
+ #
354
+ # With libtool 1.5 this exception was removed, and libtool now
355
+ # generates 2 separate objects for the 2 libraries. These two
356
+ # compilations output dependencies in $dir.libs/$base.o.d and
357
+ # in $dir$base.o.d. We have to check for both files, because
358
+ # one of the two compilations can be disabled. We should prefer
359
+ # $dir$base.o.d over $dir.libs/$base.o.d because the latter is
360
+ # automatically cleaned when .libs/ is deleted, while ignoring
361
+ # the former would cause a distcleancheck panic.
362
+ tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
363
+ tmpdepfile2=$dir$base.o.d # libtool 1.5
364
+ tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
365
+ tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
366
+ "$@" -Wc,-MD
367
+ else
368
+ tmpdepfile1=$dir$base.o.d
369
+ tmpdepfile2=$dir$base.d
370
+ tmpdepfile3=$dir$base.d
371
+ tmpdepfile4=$dir$base.d
372
+ "$@" -MD
373
+ fi
374
+
375
+ stat=$?
376
+ if test $stat -eq 0; then :
377
+ else
378
+ rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
379
+ exit $stat
380
+ fi
381
+
382
+ for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
383
+ do
384
+ test -f "$tmpdepfile" && break
385
+ done
386
+ if test -f "$tmpdepfile"; then
387
+ sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
388
+ # That's a tab and a space in the [].
389
+ sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
390
+ else
391
+ echo "#dummy" > "$depfile"
392
+ fi
393
+ rm -f "$tmpdepfile"
394
+ ;;
395
+
396
+ #nosideeffect)
397
+ # This comment above is used by automake to tell side-effect
398
+ # dependency tracking mechanisms from slower ones.
399
+
400
+ dashmstdout)
401
+ # Important note: in order to support this mode, a compiler *must*
402
+ # always write the preprocessed file to stdout, regardless of -o.
403
+ "$@" || exit $?
404
+
405
+ # Remove the call to Libtool.
406
+ if test "$libtool" = yes; then
407
+ while test $1 != '--mode=compile'; do
408
+ shift
409
+ done
410
+ shift
411
+ fi
412
+
413
+ # Remove `-o $object'.
414
+ IFS=" "
415
+ for arg
416
+ do
417
+ case $arg in
418
+ -o)
419
+ shift
420
+ ;;
421
+ $object)
422
+ shift
423
+ ;;
424
+ *)
425
+ set fnord "$@" "$arg"
426
+ shift # fnord
427
+ shift # $arg
428
+ ;;
429
+ esac
430
+ done
431
+
432
+ test -z "$dashmflag" && dashmflag=-M
433
+ # Require at least two characters before searching for `:'
434
+ # in the target name. This is to cope with DOS-style filenames:
435
+ # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
436
+ "$@" $dashmflag |
437
+ sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
438
+ rm -f "$depfile"
439
+ cat < "$tmpdepfile" > "$depfile"
440
+ tr ' ' '
441
+ ' < "$tmpdepfile" | \
442
+ ## Some versions of the HPUX 10.20 sed can't process this invocation
443
+ ## correctly. Breaking it into two sed invocations is a workaround.
444
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
445
+ rm -f "$tmpdepfile"
446
+ ;;
447
+
448
+ dashXmstdout)
449
+ # This case only exists to satisfy depend.m4. It is never actually
450
+ # run, as this mode is specially recognized in the preamble.
451
+ exit 1
452
+ ;;
453
+
454
+ makedepend)
455
+ "$@" || exit $?
456
+ # Remove any Libtool call
457
+ if test "$libtool" = yes; then
458
+ while test $1 != '--mode=compile'; do
459
+ shift
460
+ done
461
+ shift
462
+ fi
463
+ # X makedepend
464
+ shift
465
+ cleared=no
466
+ for arg in "$@"; do
467
+ case $cleared in
468
+ no)
469
+ set ""; shift
470
+ cleared=yes ;;
471
+ esac
472
+ case "$arg" in
473
+ -D*|-I*)
474
+ set fnord "$@" "$arg"; shift ;;
475
+ # Strip any option that makedepend may not understand. Remove
476
+ # the object too, otherwise makedepend will parse it as a source file.
477
+ -*|$object)
478
+ ;;
479
+ *)
480
+ set fnord "$@" "$arg"; shift ;;
481
+ esac
482
+ done
483
+ obj_suffix="`echo $object | sed 's/^.*\././'`"
484
+ touch "$tmpdepfile"
485
+ ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
486
+ rm -f "$depfile"
487
+ cat < "$tmpdepfile" > "$depfile"
488
+ sed '1,2d' "$tmpdepfile" | tr ' ' '
489
+ ' | \
490
+ ## Some versions of the HPUX 10.20 sed can't process this invocation
491
+ ## correctly. Breaking it into two sed invocations is a workaround.
492
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
493
+ rm -f "$tmpdepfile" "$tmpdepfile".bak
494
+ ;;
495
+
496
+ cpp)
497
+ # Important note: in order to support this mode, a compiler *must*
498
+ # always write the preprocessed file to stdout.
499
+ "$@" || exit $?
500
+
501
+ # Remove the call to Libtool.
502
+ if test "$libtool" = yes; then
503
+ while test $1 != '--mode=compile'; do
504
+ shift
505
+ done
506
+ shift
507
+ fi
508
+
509
+ # Remove `-o $object'.
510
+ IFS=" "
511
+ for arg
512
+ do
513
+ case $arg in
514
+ -o)
515
+ shift
516
+ ;;
517
+ $object)
518
+ shift
519
+ ;;
520
+ *)
521
+ set fnord "$@" "$arg"
522
+ shift # fnord
523
+ shift # $arg
524
+ ;;
525
+ esac
526
+ done
527
+
528
+ "$@" -E |
529
+ sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
530
+ -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
531
+ sed '$ s: \\$::' > "$tmpdepfile"
532
+ rm -f "$depfile"
533
+ echo "$object : \\" > "$depfile"
534
+ cat < "$tmpdepfile" >> "$depfile"
535
+ sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
536
+ rm -f "$tmpdepfile"
537
+ ;;
538
+
539
+ msvisualcpp)
540
+ # Important note: in order to support this mode, a compiler *must*
541
+ # always write the preprocessed file to stdout, regardless of -o,
542
+ # because we must use -o when running libtool.
543
+ "$@" || exit $?
544
+ IFS=" "
545
+ for arg
546
+ do
547
+ case "$arg" in
548
+ "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
549
+ set fnord "$@"
550
+ shift
551
+ shift
552
+ ;;
553
+ *)
554
+ set fnord "$@" "$arg"
555
+ shift
556
+ shift
557
+ ;;
558
+ esac
559
+ done
560
+ "$@" -E |
561
+ sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
562
+ rm -f "$depfile"
563
+ echo "$object : \\" > "$depfile"
564
+ . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile"
565
+ echo " " >> "$depfile"
566
+ . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
567
+ rm -f "$tmpdepfile"
568
+ ;;
569
+
570
+ none)
571
+ exec "$@"
572
+ ;;
573
+
574
+ *)
575
+ echo "Unknown depmode $depmode" 1>&2
576
+ exit 1
577
+ ;;
578
+ esac
579
+
580
+ exit 0
581
+
582
+ # Local Variables:
583
+ # mode: shell-script
584
+ # sh-indentation: 2
585
+ # eval: (add-hook 'write-file-hooks 'time-stamp)
586
+ # time-stamp-start: "scriptversion="
587
+ # time-stamp-format: "%:y-%02m-%02d.%02H"
588
+ # time-stamp-end: "$"
589
+ # End:
mosesdecoder/contrib/lmserver/lmserver.h ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
+ /* $Id$ */
3
+ #ifndef lmserver_lmserver_h
4
+ #define lmserver_lmserver_h
5
+
6
+ #ifdef HAVE_CONFIG_H
7
+ #include "config.h"
8
+ #endif
9
+
10
+ #include <sys/types.h>
11
+ #include <sys/socket.h>
12
+ #include <sys/time.h>
13
+ #include <netinet/in.h>
14
+ #include <event.h>
15
+ #include <netdb.h>
16
+
17
+ #define DATA_BUFFER_SIZE 2048
18
+ #define UDP_READ_BUFFER_SIZE 65536
19
+ #define UDP_MAX_PAYLOAD_SIZE 1400
20
+ #define UDP_HEADER_SIZE 8
21
+ #define MAX_SENDBUF_SIZE (256 * 1024 * 1024)
22
+ /* I'm told the max legnth of a 64-bit num converted to string is 20 bytes.
23
+ * Plus a few for spaces, \r\n, \0 */
24
+ #define SUFFIX_SIZE 24
25
+
26
+ /** Initial size of list of items being returned by "get". */
27
+ #define ITEM_LIST_INITIAL 200
28
+
29
+ /** Initial size of list of CAS suffixes appended to "gets" lines. */
30
+ #define SUFFIX_LIST_INITIAL 20
31
+
32
+ /** Initial size of the sendmsg() scatter/gather array. */
33
+ #define IOV_LIST_INITIAL 400
34
+
35
+ /** Initial number of sendmsg() argument structures to allocate. */
36
+ #define MSG_LIST_INITIAL 10
37
+
38
+ /** High water marks for buffer shrinking */
39
+ #define READ_BUFFER_HIGHWAT 8192
40
+ #define ITEM_LIST_HIGHWAT 400
41
+ #define IOV_LIST_HIGHWAT 600
42
+ #define MSG_LIST_HIGHWAT 100
43
+
44
+ /* Get a consistent bool type */
45
+ #if HAVE_STDBOOL_H
46
+ # include <stdbool.h>
47
+ #else
48
+ typedef enum {false = 0, true = 1} bool;
49
+ #endif
50
+
51
+ #if HAVE_STDINT_H
52
+ # include <stdint.h>
53
+ #else
54
+ typedef unsigned char uint8_t;
55
+ #endif
56
+
57
+ /* unistd.h is here */
58
+ #if HAVE_UNISTD_H
59
+ # include <unistd.h>
60
+ #endif
61
+
62
+ /** Time relative to server start. Smaller than time_t on 64-bit systems. */
63
+ typedef unsigned int rel_time_t;
64
+
65
+ struct stats {
66
+ unsigned int curr_items;
67
+ unsigned int total_items;
68
+ uint64_t curr_bytes;
69
+ unsigned int curr_conns;
70
+ unsigned int total_conns;
71
+ unsigned int conn_structs;
72
+ uint64_t get_cmds;
73
+ uint64_t set_cmds;
74
+ uint64_t get_hits;
75
+ uint64_t get_misses;
76
+ uint64_t evictions;
77
+ time_t started; /* when the process was started */
78
+ uint64_t bytes_read;
79
+ uint64_t bytes_written;
80
+ };
81
+
82
+ #define MAX_VERBOSITY_LEVEL 2
83
+
84
+ struct settings {
85
+ size_t maxbytes;
86
+ int maxconns;
87
+ int port;
88
+ int udpport;
89
+ char *srilm;
90
+ int srilm_order;
91
+ char *inter;
92
+ int verbose;
93
+ rel_time_t oldest_live; /* ignore existing items older than this */
94
+ bool managed; /* if 1, a tracker manages virtual buckets */
95
+ int evict_to_free;
96
+ char *socketpath; /* path to unix socket if using local socket */
97
+ int access; /* access mask (a la chmod) for unix domain socket */
98
+ double factor; /* chunk size growth factor */
99
+ int chunk_size;
100
+ int num_threads; /* number of libevent threads to run */
101
+ char prefix_delimiter; /* character that marks a key prefix (for stats) */
102
+ int detail_enabled; /* nonzero if we're collecting detailed stats */
103
+ };
104
+
105
+ extern struct stats stats;
106
+ extern struct settings settings;
107
+
108
+ #define ITEM_LINKED 1
109
+ #define ITEM_DELETED 2
110
+
111
+ /* temp */
112
+ #define ITEM_SLABBED 4
113
+
114
+ typedef struct _stritem {
115
+ struct _stritem *next;
116
+ struct _stritem *prev;
117
+ struct _stritem *h_next; /* hash chain next */
118
+ rel_time_t time; /* least recent access */
119
+ rel_time_t exptime; /* expire time */
120
+ int nbytes; /* size of data */
121
+ unsigned short refcount;
122
+ uint8_t nsuffix; /* length of flags-and-length string */
123
+ uint8_t it_flags; /* ITEM_* above */
124
+ uint8_t slabs_clsid;/* which slab class we're in */
125
+ uint8_t nkey; /* key length, w/terminating null and padding */
126
+ uint64_t cas_id; /* the CAS identifier */
127
+ void * end[];
128
+ /* then null-terminated key */
129
+ /* then " flags length\r\n" (no terminating null) */
130
+ /* then data with terminating \r\n (no terminating null; it's binary!) */
131
+ } item;
132
+
133
+ #define ITEM_key(item) ((char*)&((item)->end[0]))
134
+
135
+ /* warning: don't use these macros with a function, as it evals its arg twice */
136
+ #define ITEM_suffix(item) ((char*) &((item)->end[0]) + (item)->nkey + 1)
137
+ #define ITEM_data(item) ((char*) &((item)->end[0]) + (item)->nkey + 1 + (item)->nsuffix)
138
+ #define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 + (item)->nsuffix + (item)->nbytes)
139
+
140
+ enum conn_states {
141
+ conn_listening, /** the socket which listens for connections */
142
+ conn_read, /** reading in a command line */
143
+ conn_write, /** writing out a simple response */
144
+ conn_nread, /** reading in a fixed number of bytes */
145
+ conn_swallow, /** swallowing unnecessary bytes w/o storing */
146
+ conn_closing, /** closing this connection */
147
+ conn_mwrite, /** writing out many items sequentially */
148
+ };
149
+
150
+ #define NREAD_ADD 1
151
+ #define NREAD_SET 2
152
+ #define NREAD_REPLACE 3
153
+ #define NREAD_APPEND 4
154
+ #define NREAD_PREPEND 5
155
+ #define NREAD_CAS 6
156
+
157
+ typedef struct conn conn;
158
+ struct conn {
159
+ int sfd;
160
+ int state;
161
+ struct event event;
162
+ short ev_flags;
163
+ short which; /** which events were just triggered */
164
+
165
+ char *rbuf; /** buffer to read commands into */
166
+ char *rcurr; /** but if we parsed some already, this is where we stopped */
167
+ int rsize; /** total allocated size of rbuf */
168
+ int rbytes; /** how much data, starting from rcur, do we have unparsed */
169
+
170
+ char *wbuf;
171
+ char *wcurr;
172
+ int wsize;
173
+ int wbytes;
174
+ int write_and_go; /** which state to go into after finishing current write */
175
+ void *write_and_free; /** free this memory after finishing writing */
176
+
177
+ char *ritem; /** when we read in an item's value, it goes here */
178
+ int rlbytes;
179
+
180
+ /* data for the nread state */
181
+
182
+ /**
183
+ * item is used to hold an item structure created after reading the command
184
+ * line of set/add/replace commands, but before we finished reading the actual
185
+ * data. The data is read into ITEM_data(item) to avoid extra copying.
186
+ */
187
+
188
+ void *item; /* for commands set/add/replace */
189
+ int item_comm; /* which one is it: set/add/replace */
190
+
191
+ /* data for the swallow state */
192
+ int sbytes; /* how many bytes to swallow */
193
+
194
+ /* data for the mwrite state */
195
+ struct iovec *iov;
196
+ int iovsize; /* number of elements allocated in iov[] */
197
+ int iovused; /* number of elements used in iov[] */
198
+
199
+ struct msghdr *msglist;
200
+ int msgsize; /* number of elements allocated in msglist[] */
201
+ int msgused; /* number of elements used in msglist[] */
202
+ int msgcurr; /* element in msglist[] being transmitted now */
203
+ int msgbytes; /* number of bytes in current msg */
204
+
205
+ item **ilist; /* list of items to write out */
206
+ int isize;
207
+ item **icurr;
208
+ int ileft;
209
+
210
+ char **suffixlist;
211
+ int suffixsize;
212
+ char **suffixcurr;
213
+ int suffixleft;
214
+
215
+ /* data for UDP clients */
216
+ bool udp; /* is this is a UDP "connection" */
217
+ int request_id; /* Incoming UDP request ID, if this is a UDP "connection" */
218
+ struct sockaddr request_addr; /* Who sent the most recent request */
219
+ socklen_t request_addr_size;
220
+ unsigned char *hdrbuf; /* udp packet headers */
221
+ int hdrsize; /* number of headers' worth of space is allocated */
222
+
223
+ int binary; /* are we in binary mode */
224
+ int bucket; /* bucket number for the next command, if running as
225
+ a managed instance. -1 (_not_ 0) means invalid. */
226
+ int gen; /* generation requested for the bucket */
227
+ bool noreply; /* True if the reply should not be sent. */
228
+ conn *next; /* Used for generating a list of conn structures */
229
+ };
230
+
231
+ /* number of virtual buckets for a managed instance */
232
+ #define MAX_BUCKETS 32768
233
+
234
+ /* current time of day (updated periodically) */
235
+ extern volatile rel_time_t current_time;
236
+
237
+ /*
238
+ * Functions
239
+ */
240
+
241
+ conn *do_conn_from_freelist();
242
+ bool do_conn_add_to_freelist(conn *c);
243
+ conn *conn_new(const int sfd, const int init_state, const int event_flags, const int read_buffer_size, const bool is_udp, struct event_base *base);
244
+
245
+
246
+ #include "stats.h"
247
+ //#include "slabs.h"
248
+ //#include "assoc.h"
249
+ //#include "items.h"
250
+ //#include "memcached_dtrace.h"
251
+
252
+ /*
253
+ * In multithreaded mode, we wrap certain functions with lock management and
254
+ * replace the logic of some other functions. All wrapped functions have
255
+ * "mt_" and "do_" variants. In multithreaded mode, the plain version of a
256
+ * function is #define-d to the "mt_" variant, which often just grabs a
257
+ * lock and calls the "do_" function. In singlethreaded mode, the "do_"
258
+ * function is called directly.
259
+ *
260
+ * Functions such as the libevent-related calls that need to do cross-thread
261
+ * communication in multithreaded mode (rather than actually doing the work
262
+ * in the current thread) are called via "dispatch_" frontends, which are
263
+ * also #define-d to directly call the underlying code in singlethreaded mode.
264
+ */
265
+ #ifdef USE_THREADS
266
+
267
+ void thread_init(int nthreads, struct event_base *main_base);
268
+ int dispatch_event_add(int thread, conn *c);
269
+ void dispatch_conn_new(int sfd, int init_state, int event_flags, int read_buffer_size, int is_udp);
270
+
271
+ /* Lock wrappers for cache functions that are called from main loop. */
272
+ char *mt_add_delta(conn *c, item *item, const int incr, const int64_t delta,
273
+ char *buf);
274
+ void mt_assoc_move_next_bucket(void);
275
+ conn *mt_conn_from_freelist(void);
276
+ bool mt_conn_add_to_freelist(conn *c);
277
+ char *mt_suffix_from_freelist(void);
278
+ bool mt_suffix_add_to_freelist(char *s);
279
+ char *mt_defer_delete(item *it, time_t exptime);
280
+ int mt_is_listen_thread(void);
281
+ item *mt_item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes);
282
+ char *mt_item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes);
283
+ void mt_item_flush_expired(void);
284
+ item *mt_item_get_notedeleted(const char *key, const size_t nkey, bool *delete_locked);
285
+ int mt_item_link(item *it);
286
+ void mt_item_remove(item *it);
287
+ int mt_item_replace(item *it, item *new_it);
288
+ char *mt_item_stats(int *bytes);
289
+ char *mt_item_stats_sizes(int *bytes);
290
+ void mt_item_unlink(item *it);
291
+ void mt_item_update(item *it);
292
+ void mt_run_deferred_deletes(void);
293
+ void *mt_slabs_alloc(size_t size, unsigned int id);
294
+ void mt_slabs_free(void *ptr, size_t size, unsigned int id);
295
+ int mt_slabs_reassign(unsigned char srcid, unsigned char dstid);
296
+ char *mt_slabs_stats(int *buflen);
297
+ void mt_stats_lock(void);
298
+ void mt_stats_unlock(void);
299
+ int mt_store_item(item *item, int comm);
300
+
301
+
302
+ # define add_delta(c,x,y,z,a) mt_add_delta(c,x,y,z,a)
303
+ # define assoc_move_next_bucket() mt_assoc_move_next_bucket()
304
+ # define conn_from_freelist() mt_conn_from_freelist()
305
+ # define conn_add_to_freelist(x) mt_conn_add_to_freelist(x)
306
+ # define suffix_from_freelist() mt_suffix_from_freelist()
307
+ # define suffix_add_to_freelist(x) mt_suffix_add_to_freelist(x)
308
+ # define defer_delete(x,y) mt_defer_delete(x,y)
309
+ # define is_listen_thread() mt_is_listen_thread()
310
+ # define item_alloc(x,y,z,a,b) mt_item_alloc(x,y,z,a,b)
311
+ # define item_cachedump(x,y,z) mt_item_cachedump(x,y,z)
312
+ # define item_flush_expired() mt_item_flush_expired()
313
+ # define item_get_notedeleted(x,y,z) mt_item_get_notedeleted(x,y,z)
314
+ # define item_link(x) mt_item_link(x)
315
+ # define item_remove(x) mt_item_remove(x)
316
+ # define item_replace(x,y) mt_item_replace(x,y)
317
+ # define item_stats(x) mt_item_stats(x)
318
+ # define item_stats_sizes(x) mt_item_stats_sizes(x)
319
+ # define item_update(x) mt_item_update(x)
320
+ # define item_unlink(x) mt_item_unlink(x)
321
+ # define run_deferred_deletes() mt_run_deferred_deletes()
322
+ # define slabs_alloc(x,y) mt_slabs_alloc(x,y)
323
+ # define slabs_free(x,y,z) mt_slabs_free(x,y,z)
324
+ # define slabs_reassign(x,y) mt_slabs_reassign(x,y)
325
+ # define slabs_stats(x) mt_slabs_stats(x)
326
+ # define store_item(x,y) mt_store_item(x,y)
327
+
328
+ # define STATS_LOCK() mt_stats_lock()
329
+ # define STATS_UNLOCK() mt_stats_unlock()
330
+
331
+ #else /* !USE_THREADS */
332
+
333
+ # define add_delta(c,x,y,z,a) do_add_delta(c,x,y,z,a)
334
+ # define assoc_move_next_bucket() do_assoc_move_next_bucket()
335
+ # define conn_from_freelist() do_conn_from_freelist()
336
+ # define conn_add_to_freelist(x) do_conn_add_to_freelist(x)
337
+ # define suffix_from_freelist() do_suffix_from_freelist()
338
+ # define suffix_add_to_freelist(x) do_suffix_add_to_freelist(x)
339
+ # define defer_delete(x,y) do_defer_delete(x,y)
340
+ # define dispatch_conn_new(x,y,z,a,b) conn_new(x,y,z,a,b,main_base)
341
+ # define dispatch_event_add(t,c) event_add(&(c)->event, 0)
342
+ # define is_listen_thread() 1
343
+ # define item_alloc(x,y,z,a,b) do_item_alloc(x,y,z,a,b)
344
+ # define item_cachedump(x,y,z) do_item_cachedump(x,y,z)
345
+ # define item_flush_expired() do_item_flush_expired()
346
+ # define item_get_notedeleted(x,y,z) do_item_get_notedeleted(x,y,z)
347
+ # define item_link(x) do_item_link(x)
348
+ # define item_remove(x) do_item_remove(x)
349
+ # define item_replace(x,y) do_item_replace(x,y)
350
+ # define item_stats(x) do_item_stats(x)
351
+ # define item_stats_sizes(x) do_item_stats_sizes(x)
352
+ # define item_unlink(x) do_item_unlink(x)
353
+ # define item_update(x) do_item_update(x)
354
+ # define run_deferred_deletes() do_run_deferred_deletes()
355
+ # define slabs_alloc(x,y) do_slabs_alloc(x,y)
356
+ # define slabs_free(x,y,z) do_slabs_free(x,y,z)
357
+ # define slabs_reassign(x,y) do_slabs_reassign(x,y)
358
+ # define slabs_stats(x) do_slabs_stats(x)
359
+ # define store_item(x,y) do_store_item(x,y)
360
+ # define thread_init(x,y) 0
361
+
362
+ # define STATS_LOCK() /**/
363
+ # define STATS_UNLOCK() /**/
364
+
365
+ #endif /* !USE_THREADS */
366
+
367
+ /* If supported, give compiler hints for branch prediction. */
368
+ #if !defined(__GNUC__) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
369
+ #define __builtin_expect(x, expected_value) (x)
370
+ #endif
371
+
372
+ #define likely(x) __builtin_expect((x),1)
373
+ #define unlikely(x) __builtin_expect((x),0)
374
+
375
+ #endif
mosesdecoder/contrib/lmserver/stamp-h1 ADDED
@@ -0,0 +1 @@
 
 
1
+ timestamp for config.h
mosesdecoder/contrib/mert-moses-multi.pl ADDED
@@ -0,0 +1,1529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/perl -w
2
+ # $Id$
3
+ # Usage:
4
+ # mert-moses.pl <foreign> <english> <decoder-executable> <decoder-config>
5
+ # For other options see below or run 'mert-moses.pl --help'
6
+
7
+ #
8
+ # NB: This is a variant of of mert-moses.pl for use with the interpolated scorer
9
+ # (MergeScorer) described in the following paper:
10
+ #
11
+ # "Optimising Multiple Metrics with MERT" by Christophe Servan and Holger Schwenk,
12
+ # Prague Bulletin of Mathematical Linguistics 96 (2011) p109-117
13
+ # http://www-lium.univ-lemans.fr/~servan/publications/Servan_PBML_2011.pdf
14
+ #
15
+ # If you are not using MergeScorer, then you should use the mert-moses.pl script instead
16
+ #
17
+
18
+
19
+ # Notes:
20
+ # <foreign> and <english> should be raw text files, one sentence per line
21
+ # <english> can be a prefix, in which case the files are <english>0, <english>1, etc. are used
22
+
23
+ # Excerpts from revision history
24
+
25
+ # Sept 2011 multi-threaded mert (Barry Haddow)
26
+ # 3 Aug 2011 Added random directions, historic best, pairwise ranked (PK)
27
+ # Jul 2011 simplifications (Ondrej Bojar)
28
+ # -- rely on moses' -show-weights instead of parsing moses.ini
29
+ # ... so moses is also run once *before* mert starts, checking
30
+ # the model to some extent
31
+ # -- got rid of the 'triples' mess;
32
+ # use --range to supply bounds for random starting values:
33
+ # --range tm:-3..3 --range lm:-3..3
34
+ # 5 Aug 2009 Handling with different reference length policies (shortest, average, closest) for BLEU
35
+ # and case-sensistive/insensitive evaluation (Nicola Bertoldi)
36
+ # 5 Jun 2008 Forked previous version to support new mert implementation.
37
+ # 13 Feb 2007 Better handling of default values for lambda, now works with multiple
38
+ # models and lexicalized reordering
39
+ # 11 Oct 2006 Handle different input types through parameter --inputype=[0|1]
40
+ # (0 for text, 1 for confusion network, default is 0) (Nicola Bertoldi)
41
+ # 10 Oct 2006 Allow skip of filtering of phrase tables (--no-filter-phrase-table)
42
+ # useful if binary phrase tables are used (Nicola Bertoldi)
43
+ # 28 Aug 2006 Use either closest or average or shortest (default) reference
44
+ # length as effective reference length
45
+ # Use either normalization or not (default) of texts (Nicola Bertoldi)
46
+ # 31 Jul 2006 move gzip run*.out to avoid failure wit restartings
47
+ # adding default paths
48
+ # 29 Jul 2006 run-filter, score-nbest and mert run on the queue (Nicola; Ondrej had to type it in again)
49
+ # 28 Jul 2006 attempt at foolproof usage, strong checking of input validity, merged the parallel and nonparallel version (Ondrej Bojar)
50
+ # 27 Jul 2006 adding the safesystem() function to handle with process failure
51
+ # 22 Jul 2006 fixed a bug about handling relative path of configuration file (Nicola Bertoldi)
52
+ # 21 Jul 2006 adapted for Moses-in-parallel (Nicola Bertoldi)
53
+ # 18 Jul 2006 adapted for Moses and cleaned up (PK)
54
+ # 21 Jan 2005 unified various versions, thorough cleanup (DWC)
55
+ # now indexing accumulated n-best list solely by feature vectors
56
+ # 14 Dec 2004 reimplemented find_threshold_points in C (NMD)
57
+ # 25 Oct 2004 Use either average or shortest (default) reference
58
+ # length as effective reference length (DWC)
59
+ # 13 Oct 2004 Use alternative decoders (DWC)
60
+ # Original version by Philipp Koehn
61
+
62
+ use FindBin qw($RealBin);
63
+ use File::Basename;
64
+ use File::Path;
65
+ my $SCRIPTS_ROOTDIR = $RealBin;
66
+ $SCRIPTS_ROOTDIR =~ s/\/training$//;
67
+ $SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
68
+
69
+ ## We preserve this bit of comments to keep the traditional weight ranges.
70
+ # "w" => [ [ 0.0, -1.0, 1.0 ] ], # word penalty
71
+ # "d" => [ [ 1.0, 0.0, 2.0 ] ], # lexicalized reordering model
72
+ # "lm" => [ [ 1.0, 0.0, 2.0 ] ], # language model
73
+ # "g" => [ [ 1.0, 0.0, 2.0 ], # generation model
74
+ # [ 1.0, 0.0, 2.0 ] ],
75
+ # "tm" => [ [ 0.3, 0.0, 0.5 ], # translation model
76
+ # [ 0.2, 0.0, 0.5 ],
77
+ # [ 0.3, 0.0, 0.5 ],
78
+ # [ 0.2, 0.0, 0.5 ],
79
+ # [ 0.0,-1.0, 1.0 ] ], # ... last weight is phrase penalty
80
+ # "lex"=> [ [ 0.1, 0.0, 0.2 ] ], # global lexical model
81
+ # "I" => [ [ 0.0,-1.0, 1.0 ] ], # input lattice scores
82
+
83
+
84
+
85
+ # moses.ini file uses FULL names for lambdas, while this training script
86
+ # internally (and on the command line) uses ABBR names.
87
+ my @ABBR_FULL_MAP = qw(d=weight-d lm=weight-l tm=weight-t w=weight-w
88
+ g=weight-generation lex=weight-lex I=weight-i);
89
+ my %ABBR2FULL = map {split/=/,$_,2} @ABBR_FULL_MAP;
90
+ my %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} @ABBR_FULL_MAP;
91
+
92
+ my $minimum_required_change_in_weights = 0.00001;
93
+ # stop if no lambda changes more than this
94
+
95
+ my $verbose = 0;
96
+ my $usage = 0; # request for --help
97
+ my $___WORKING_DIR = "mert-work";
98
+ my $___DEV_F = undef; # required, input text to decode
99
+ my $___DEV_E = undef; # required, basename of files with references
100
+ my $___DECODER = undef; # required, pathname to the decoder executable
101
+ my $___CONFIG = undef; # required, pathname to startup ini file
102
+ my $___N_BEST_LIST_SIZE = 100;
103
+ my $queue_flags = "-hard"; # extra parameters for parallelizer
104
+ # the -l ws0ssmt was relevant only to JHU 2006 workshop
105
+ my $___JOBS = undef; # if parallel, number of jobs to use (undef or 0 -> serial)
106
+ my $___DECODER_FLAGS = ""; # additional parametrs to pass to the decoder
107
+ my $continue = 0; # should we try to continue from the last saved step?
108
+ my $skip_decoder = 0; # and should we skip the first decoder run (assuming we got interrupted during mert)
109
+ my $___FILTER_PHRASE_TABLE = 1; # filter phrase table
110
+ my $___PREDICTABLE_SEEDS = 0;
111
+ my $___START_WITH_HISTORIC_BESTS = 0; # use best settings from all previous iterations as starting points [Foster&Kuhn,2009]
112
+ my $___RANDOM_DIRECTIONS = 0; # search in random directions only
113
+ my $___NUM_RANDOM_DIRECTIONS = 0; # number of random directions, also works with default optimizer [Cer&al.,2008]
114
+ my $___PAIRWISE_RANKED_OPTIMIZER = 0; # use Hopkins&May[2011]
115
+ my $___PRO_STARTING_POINT = 0; # get a starting point from pairwise ranked optimizer
116
+ my $___RANDOM_RESTARTS = 20;
117
+ my $___HISTORIC_INTERPOLATION = 0; # interpolate optimize weights with previous iteration's weights [Hopkins&May,2011,5.4.3]
118
+ my $__THREADS = 0;
119
+
120
+ # Parameter for effective reference length when computing BLEU score
121
+ # Default is to use shortest reference
122
+ # Use "--shortest" to use shortest reference length
123
+ # Use "--average" to use average reference length
124
+ # Use "--closest" to use closest reference length
125
+ # Only one between --shortest, --average and --closest can be set
126
+ # If more than one choice the defualt (--shortest) is used
127
+ my $___SHORTEST = 0;
128
+ my $___AVERAGE = 0;
129
+ my $___CLOSEST = 0;
130
+
131
+ # Use "--nocase" to compute case-insensitive scores
132
+ my $___NOCASE = 0;
133
+
134
+ # Use "--nonorm" to non normalize translation before computing scores
135
+ my $___NONORM = 0;
136
+
137
+ # set 0 if input type is text, set 1 if input type is confusion network
138
+ my $___INPUTTYPE = 0;
139
+
140
+
141
+ my $mertdir = undef; # path to new mert directory
142
+ my $mertargs = undef; # args to pass through to mert & extractor
143
+ my $mertmertargs = undef; # args to pass through to mert only
144
+ my $filtercmd = undef; # path to filter-model-given-input.pl
145
+ my $filterfile = undef;
146
+ my $qsubwrapper = undef;
147
+ my $moses_parallel_cmd = undef;
148
+ my $scorer_config = "BLEU:1";
149
+ my $old_sge = 0; # assume sge<6.0
150
+ my $___CONFIG_ORIG = undef; # pathname to startup ini file before filtering
151
+ my $___ACTIVATE_FEATURES = undef; # comma-separated (or blank-separated) list of features to work on
152
+ # if undef work on all features
153
+ # (others are fixed to the starting values)
154
+ my $___RANGES = undef;
155
+ my $prev_aggregate_nbl_size = -1; # number of previous step to consider when loading data (default =-1)
156
+ # -1 means all previous, i.e. from iteration 1
157
+ # 0 means no previous data, i.e. from actual iteration
158
+ # 1 means 1 previous data , i.e. from the actual iteration and from the previous one
159
+ # and so on
160
+ my $maximum_iterations = 25;
161
+
162
+ use strict;
163
+ use Getopt::Long;
164
+ GetOptions(
165
+ "working-dir=s" => \$___WORKING_DIR,
166
+ "input=s" => \$___DEV_F,
167
+ "inputtype=i" => \$___INPUTTYPE,
168
+ "refs=s" => \$___DEV_E,
169
+ "decoder=s" => \$___DECODER,
170
+ "config=s" => \$___CONFIG,
171
+ "nbest=i" => \$___N_BEST_LIST_SIZE,
172
+ "queue-flags=s" => \$queue_flags,
173
+ "jobs=i" => \$___JOBS,
174
+ "decoder-flags=s" => \$___DECODER_FLAGS,
175
+ "continue" => \$continue,
176
+ "skip-decoder" => \$skip_decoder,
177
+ "shortest" => \$___SHORTEST,
178
+ "average" => \$___AVERAGE,
179
+ "closest" => \$___CLOSEST,
180
+ "nocase" => \$___NOCASE,
181
+ "nonorm" => \$___NONORM,
182
+ "help" => \$usage,
183
+ "verbose" => \$verbose,
184
+ "mertdir=s" => \$mertdir,
185
+ "mertargs=s" => \$mertargs,
186
+ "mertmertargs=s" => \$mertmertargs,
187
+ "rootdir=s" => \$SCRIPTS_ROOTDIR,
188
+ "filtercmd=s" => \$filtercmd, # allow to override the default location
189
+ "filterfile=s" => \$filterfile, # input to filtering script (useful for lattices/confnets)
190
+ "qsubwrapper=s" => \$qsubwrapper, # allow to override the default location
191
+ "mosesparallelcmd=s" => \$moses_parallel_cmd, # allow to override the default location
192
+ "old-sge" => \$old_sge, #passed to moses-parallel
193
+ "filter-phrase-table!" => \$___FILTER_PHRASE_TABLE, # (dis)allow of phrase tables
194
+ "predictable-seeds" => \$___PREDICTABLE_SEEDS, # make random restarts deterministic
195
+ "historic-bests" => \$___START_WITH_HISTORIC_BESTS, # use best settings from all previous iterations as starting points
196
+ "random-directions" => \$___RANDOM_DIRECTIONS, # search only in random directions
197
+ "number-of-random-directions=i" => \$___NUM_RANDOM_DIRECTIONS, # number of random directions
198
+ "random-restarts=i" => \$___RANDOM_RESTARTS, # number of random restarts
199
+ "activate-features=s" => \$___ACTIVATE_FEATURES, #comma-separated (or blank-separated) list of features to work on (others are fixed to the starting values)
200
+ "range=s@" => \$___RANGES,
201
+ "prev-aggregate-nbestlist=i" => \$prev_aggregate_nbl_size, #number of previous step to consider when loading data (default =-1, i.e. all previous)
202
+ "maximum-iterations=i" => \$maximum_iterations,
203
+ "pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER,
204
+ "pro-starting-point" => \$___PRO_STARTING_POINT,
205
+ "historic-interpolation=f" => \$___HISTORIC_INTERPOLATION,
206
+ "threads=i" => \$__THREADS,
207
+ "sc-config=s" => \$scorer_config
208
+ ) or exit(1);
209
+
210
+ # the 4 required parameters can be supplied on the command line directly
211
+ # or using the --options
212
+ if (scalar @ARGV == 4) {
213
+ # required parameters: input_file references_basename decoder_executable
214
+ $___DEV_F = shift;
215
+ $___DEV_E = shift;
216
+ $___DECODER = shift;
217
+ $___CONFIG = shift;
218
+ }
219
+
220
+ if ($usage || !defined $___DEV_F || !defined $___DEV_E || !defined $___DECODER || !defined $___CONFIG) {
221
+ print STDERR "usage: $0 input-text references decoder-executable decoder.ini
222
+ Options:
223
+ --working-dir=mert-dir ... where all the files are created
224
+ --nbest=100 ... how big nbestlist to generate
225
+ --jobs=N ... set this to anything to run moses in parallel
226
+ --mosesparallelcmd=STR ... use a different script instead of moses-parallel
227
+ --queue-flags=STRING ... anything you with to pass to qsub, eg.
228
+ '-l ws06osssmt=true'. The default is: '-hard'
229
+ To reset the parameters, please use
230
+ --queue-flags=' '
231
+ (i.e. a space between the quotes).
232
+ --decoder-flags=STRING ... extra parameters for the decoder
233
+ --continue ... continue from the last successful iteration
234
+ --skip-decoder ... skip the decoder run for the first time,
235
+ assuming that we got interrupted during
236
+ optimization
237
+ --shortest --average --closest
238
+ ... Use shortest/average/closest reference length
239
+ as effective reference length (mutually exclusive)
240
+ --nocase ... Do not preserve case information; i.e.
241
+ case-insensitive evaluation (default is false).
242
+ --nonorm ... Do not use text normalization (flag is not active,
243
+ i.e. text is NOT normalized)
244
+ --filtercmd=STRING ... path to filter-model-given-input.pl
245
+ --filterfile=STRING ... path to alternative to input-text for filtering
246
+ model. useful for lattice decoding
247
+ --rootdir=STRING ... where do helpers reside (if not given explicitly)
248
+ --mertdir=STRING ... path to new mert implementation
249
+ --mertargs=STRING ... extra args for mert, eg. to specify scorer
250
+ --mertmertargs=STRING ... extra args for mert only,
251
+ --scorenbestcmd=STRING ... path to score-nbest.py
252
+ --old-sge ... passed to parallelizers, assume Grid Engine < 6.0
253
+ --inputtype=[0|1|2] ... Handle different input types: (0 for text,
254
+ 1 for confusion network, 2 for lattices,
255
+ default is 0)
256
+ --no-filter-phrase-table ... disallow filtering of phrase tables
257
+ (useful if binary phrase tables are available)
258
+ --random-restarts=INT ... number of random restarts (default: 20)
259
+ --predictable-seeds ... provide predictable seeds to mert so that random
260
+ restarts are the same on every run
261
+ --range=tm:0..1,-1..1 ... specify min and max value for some features
262
+ --range can be repeated as needed.
263
+ The order of the various --range specifications
264
+ is important only within a feature name.
265
+ E.g.:
266
+ --range=tm:0..1,-1..1 --range=tm:0..2
267
+ is identical to:
268
+ --range=tm:0..1,-1..1,0..2
269
+ but not to:
270
+ --range=tm:0..2 --range=tm:0..1,-1..1
271
+ --activate-features=STRING ... comma-separated list of features to optimize,
272
+ others are fixed to the starting values
273
+ default: optimize all features
274
+ example: tm_0,tm_4,d_0
275
+ --prev-aggregate-nbestlist=INT ... number of previous step to consider when
276
+ loading data (default = $prev_aggregate_nbl_size)
277
+ -1 means all previous, i.e. from iteration 1
278
+ 0 means no previous data, i.e. only the
279
+ current iteration
280
+ N means this and N previous iterations
281
+
282
+ --maximum-iterations=ITERS ... Maximum number of iterations. Default: $maximum_iterations
283
+ --random-directions ... search only in random directions
284
+ --number-of-random-directions=int ... number of random directions
285
+ (also works with regular optimizer, default: 0)
286
+ --pairwise-ranked ... Use PRO for optimisation (Hopkins and May, emnlp 2011)
287
+ --pro-starting-point ... Use PRO to get a starting point for MERT
288
+ --threads=NUMBER ... Use multi-threaded mert (must be compiled in).
289
+ --historic-interpolation ... Interpolate optimized weights with prior iterations' weight
290
+ (parameter sets factor [0;1] given to current weights)
291
+ --sc-config=STRING ... extra option to specify multiscoring.
292
+ ";
293
+ exit 1;
294
+ }
295
+
296
+
297
+ # Check validity of input parameters and set defaults if needed
298
+
299
+ print STDERR "Using WORKING_DIR: $___WORKING_DIR\n";
300
+ print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n";
301
+
302
+ # path of script for filtering phrase tables and running the decoder
303
+ $filtercmd="$SCRIPTS_ROOTDIR/training/filter-model-given-input.pl" if !defined $filtercmd;
304
+
305
+ if ( ! -x $filtercmd && ! $___FILTER_PHRASE_TABLE) {
306
+ print STDERR "Filtering command not found: $filtercmd.\n";
307
+ print STDERR "Use --filtercmd=PATH to specify a valid one or --no-filter-phrase-table\n";
308
+ exit 1;
309
+ }
310
+
311
+ $qsubwrapper="$SCRIPTS_ROOTDIR/generic/qsub-wrapper.pl" if !defined $qsubwrapper;
312
+
313
+ $moses_parallel_cmd = "$SCRIPTS_ROOTDIR/generic/moses-parallel.pl"
314
+ if !defined $moses_parallel_cmd;
315
+
316
+ if (!defined $mertdir) {
317
+ $mertdir = "/usr/bin";
318
+ print STDERR "Assuming --mertdir=$mertdir\n";
319
+ }
320
+
321
+ my $mert_extract_cmd = "$mertdir/extractor";
322
+ my $mert_mert_cmd = "$mertdir/mert";
323
+
324
+ die "Not executable: $mert_extract_cmd" if ! -x $mert_extract_cmd;
325
+ die "Not executable: $mert_mert_cmd" if ! -x $mert_mert_cmd;
326
+
327
+ my $pro_optimizer = "$mertdir/megam_i686.opt"; # or set to your installation
328
+ if (($___PAIRWISE_RANKED_OPTIMIZER || $___PRO_STARTING_POINT) && ! -x $pro_optimizer) {
329
+ print "did not find $pro_optimizer, installing it in $mertdir\n";
330
+ `cd $mertdir; wget http://www.cs.utah.edu/~hal/megam/megam_i686.opt.gz;`;
331
+ `gunzip $pro_optimizer.gz`;
332
+ `chmod +x $pro_optimizer`;
333
+ die("ERROR: Installation of megam_i686.opt failed! Install by hand from http://www.cs.utah.edu/~hal/megam/") unless -x $pro_optimizer;
334
+ }
335
+
336
+ $mertargs = "" if !defined $mertargs;
337
+
338
+ my $scconfig = undef;
339
+ if ($mertargs =~ /\-\-scconfig\s+(.+?)(\s|$)/){
340
+ $scconfig=$1;
341
+ $scconfig =~ s/\,/ /g;
342
+ $mertargs =~ s/\-\-scconfig\s+(.+?)(\s|$)//;
343
+ }
344
+
345
+ # handling reference lengh strategy
346
+ if (($___CLOSEST + $___AVERAGE + $___SHORTEST) > 1){
347
+ die "You can specify just ONE reference length strategy (closest or shortest or average) not both\n";
348
+ }
349
+
350
+ if ($___SHORTEST){
351
+ $scconfig .= " reflen:shortest";
352
+ }elsif ($___AVERAGE){
353
+ $scconfig .= " reflen:average";
354
+ }elsif ($___CLOSEST){
355
+ $scconfig .= " reflen:closest";
356
+ }
357
+
358
+ # handling case-insensitive flag
359
+ if ($___NOCASE) {
360
+ $scconfig .= " case:false";
361
+ }else{
362
+ $scconfig .= " case:true";
363
+ }
364
+ $scconfig =~ s/^\s+//;
365
+ $scconfig =~ s/\s+$//;
366
+ $scconfig =~ s/\s+/,/g;
367
+
368
+ $scconfig = "--scconfig $scconfig" if ($scconfig);
369
+
370
+ my $mert_extract_args=$mertargs;
371
+ $mert_extract_args .=" $scconfig";
372
+
373
+ $mertmertargs = "" if !defined $mertmertargs;
374
+
375
+ my $mert_mert_args="$mertargs $mertmertargs";
376
+ $mert_mert_args =~ s/\-+(binary|b)\b//;
377
+ $mert_mert_args .=" $scconfig";
378
+ if ($___ACTIVATE_FEATURES){ $mert_mert_args .=" -o \"$___ACTIVATE_FEATURES\""; }
379
+
380
+ my ($just_cmd_filtercmd,$x) = split(/ /,$filtercmd);
381
+ die "Not executable: $just_cmd_filtercmd" if ! -x $just_cmd_filtercmd;
382
+ die "Not executable: $moses_parallel_cmd" if defined $___JOBS && ! -x $moses_parallel_cmd;
383
+ die "Not executable: $qsubwrapper" if defined $___JOBS && ! -x $qsubwrapper;
384
+ die "Not executable: $___DECODER" if ! -x $___DECODER;
385
+
386
+ my $input_abs = ensure_full_path($___DEV_F);
387
+ die "File not found: $___DEV_F (interpreted as $input_abs)."
388
+ if ! -e $input_abs;
389
+ $___DEV_F = $input_abs;
390
+
391
+ # Option to pass to qsubwrapper and moses-parallel
392
+ my $pass_old_sge = $old_sge ? "-old-sge" : "";
393
+
394
+ my $decoder_abs = ensure_full_path($___DECODER);
395
+ die "File not executable: $___DECODER (interpreted as $decoder_abs)."
396
+ if ! -x $decoder_abs;
397
+ $___DECODER = $decoder_abs;
398
+
399
+ my $ref_abs = ensure_full_path($___DEV_E);
400
+ # check if English dev set (reference translations) exist and store a list of all references
401
+ my @references;
402
+ if (-e $ref_abs) {
403
+ push @references, $ref_abs;
404
+ }
405
+ else {
406
+ # if multiple file, get a full list of the files
407
+ my $part = 0;
408
+ while (-e $ref_abs.$part) {
409
+ push @references, $ref_abs.$part;
410
+ $part++;
411
+ }
412
+ die("Reference translations not found: $___DEV_E (interpreted as $ref_abs)") unless $part;
413
+ }
414
+
415
+ my $config_abs = ensure_full_path($___CONFIG);
416
+ die "File not found: $___CONFIG (interpreted as $config_abs)."
417
+ if ! -e $config_abs;
418
+ $___CONFIG = $config_abs;
419
+
420
+ # moses should use our config
421
+ if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
422
+ || $___DECODER_FLAGS =~ /(^|\s)-(ttable-file|t) /
423
+ || $___DECODER_FLAGS =~ /(^|\s)-(distortion-file) /
424
+ || $___DECODER_FLAGS =~ /(^|\s)-(generation-file) /
425
+ || $___DECODER_FLAGS =~ /(^|\s)-(lmodel-file) /
426
+ || $___DECODER_FLAGS =~ /(^|\s)-(global-lexical-file) /
427
+ ) {
428
+ die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";
429
+ }
430
+
431
+ # as weights are normalized in the next steps (by cmert)
432
+ # normalize initial LAMBDAs, too
433
+ my $need_to_normalize = 1;
434
+
435
+ #store current directory and create the working directory (if needed)
436
+ my $cwd = `pawd 2>/dev/null`;
437
+ if(!$cwd){$cwd = `pwd`;}
438
+ chomp($cwd);
439
+
440
+ mkpath($___WORKING_DIR);
441
+
442
+ {
443
+ # open local scope
444
+
445
+ #chdir to the working directory
446
+ chdir($___WORKING_DIR) or die "Can't chdir to $___WORKING_DIR";
447
+
448
+ # fixed file names
449
+ my $mert_outfile = "mert.out";
450
+ my $mert_logfile = "mert.log";
451
+ my $weights_in_file = "init.opt";
452
+ my $weights_out_file = "weights.txt";
453
+
454
+ # set start run
455
+ my $start_run = 1;
456
+ my $bestpoint = undef;
457
+ my $devbleu = undef;
458
+ my $sparse_weights_file = undef;
459
+
460
+ my $prev_feature_file = undef;
461
+ my $prev_score_file = undef;
462
+ my $prev_init_file = undef;
463
+
464
+ if ($___FILTER_PHRASE_TABLE) {
465
+ my $outdir = "filtered";
466
+ if (-e "$outdir/moses.ini") {
467
+ print STDERR "Assuming the tables are already filtered, reusing $outdir/moses.ini\n";
468
+ }
469
+ else {
470
+ # filter the phrase tables with respect to input, use --decoder-flags
471
+ print STDERR "filtering the phrase tables... ".`date`;
472
+ my $___FILTER_F = $___DEV_F;
473
+ $___FILTER_F = $filterfile if (defined $filterfile);
474
+ my $cmd = "$filtercmd ./$outdir $___CONFIG $___FILTER_F";
475
+ &submit_or_exec($cmd,"filterphrases.out","filterphrases.err");
476
+ }
477
+
478
+ # make a backup copy of startup ini filepath
479
+ $___CONFIG_ORIG = $___CONFIG;
480
+ # the decoder should now use the filtered model
481
+ $___CONFIG = "$outdir/moses.ini";
482
+ }
483
+ else{
484
+ # do not filter phrase tables (useful if binary phrase tables are available)
485
+ # use the original configuration file
486
+ $___CONFIG_ORIG = $___CONFIG;
487
+ }
488
+
489
+ # we run moses to check validity of moses.ini and to obtain all the feature
490
+ # names
491
+ my $featlist = get_featlist_from_moses($___CONFIG);
492
+ $featlist = insert_ranges_to_featlist($featlist, $___RANGES);
493
+
494
+ # Mark which features are disabled:
495
+ if (defined $___ACTIVATE_FEATURES) {
496
+ my %enabled = map { ($_, 1) } split /[, ]+/, $___ACTIVATE_FEATURES;
497
+ my %cnt;
498
+ for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
499
+ my $name = $featlist->{"names"}->[$i];
500
+ $cnt{$name} = 0 if !defined $cnt{$name};
501
+ $featlist->{"enabled"}->[$i] = $enabled{$name."_".$cnt{$name}};
502
+ $cnt{$name}++;
503
+ }
504
+ } else {
505
+ # all enabled
506
+ for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
507
+ $featlist->{"enabled"}->[$i] = 1;
508
+ }
509
+ }
510
+
511
+ print STDERR "MERT starting values and ranges for random generation:\n";
512
+ for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
513
+ my $name = $featlist->{"names"}->[$i];
514
+ my $val = $featlist->{"values"}->[$i];
515
+ my $min = $featlist->{"mins"}->[$i];
516
+ my $max = $featlist->{"maxs"}->[$i];
517
+ my $enabled = $featlist->{"enabled"}->[$i];
518
+ printf STDERR " %5s = %7.3f", $name, $val;
519
+ if ($enabled) {
520
+ printf STDERR " (%5.2f .. %5.2f)\n", $min, $max;
521
+ } else {
522
+ print STDERR " --- inactive, not optimized ---\n";
523
+ }
524
+ }
525
+
526
+ if ($continue) {
527
+ # getting the last finished step
528
+ print STDERR "Trying to continue an interrupted optimization.\n";
529
+ open IN, "finished_step.txt" or die "Failed to find the step number, failed to read finished_step.txt";
530
+ my $step = <IN>;
531
+ chomp $step;
532
+ close IN;
533
+
534
+ print STDERR "Last finished step is $step\n";
535
+
536
+ # getting the first needed step
537
+ my $firststep;
538
+ if ($prev_aggregate_nbl_size==-1){
539
+ $firststep=1;
540
+ }
541
+ else{
542
+ $firststep=$step-$prev_aggregate_nbl_size+1;
543
+ $firststep=($firststep>0)?$firststep:1;
544
+ }
545
+
546
+ #checking if all needed data are available
547
+ if ($firststep<=$step){
548
+ print STDERR "First previous needed data index is $firststep\n";
549
+ print STDERR "Checking whether all needed data (from step $firststep to step $step) are available\n";
550
+
551
+ for (my $prevstep=$firststep; $prevstep<=$step;$prevstep++){
552
+ print STDERR "Checking whether data of step $prevstep are available\n";
553
+ if (! -e "run$prevstep.features.dat"){
554
+ die "Can't start from step $step, because run$prevstep.features.dat was not found!";
555
+ }else{
556
+ if (defined $prev_feature_file){
557
+ $prev_feature_file = "${prev_feature_file},run$prevstep.features.dat";
558
+ }
559
+ else{
560
+ $prev_feature_file = "run$prevstep.features.dat";
561
+ }
562
+ }
563
+ if (! -e "run$prevstep.scores.dat"){
564
+ die "Can't start from step $step, because run$prevstep.scores.dat was not found!";
565
+ }else{
566
+ if (defined $prev_score_file){
567
+ $prev_score_file = "${prev_score_file},run$prevstep.scores.dat";
568
+ }
569
+ else{
570
+ $prev_score_file = "run$prevstep.scores.dat";
571
+ }
572
+ }
573
+ if (! -e "run$prevstep.${weights_in_file}"){
574
+ die "Can't start from step $step, because run$prevstep.${weights_in_file} was not found!";
575
+ }else{
576
+ if (defined $prev_init_file){
577
+ $prev_init_file = "${prev_init_file},run$prevstep.${weights_in_file}";
578
+ }
579
+ else{
580
+ $prev_init_file = "run$prevstep.${weights_in_file}";
581
+ }
582
+ }
583
+ }
584
+ if (! -e "run$step.weights.txt"){
585
+ die "Can't start from step $step, because run$step.weights.txt was not found!";
586
+ }
587
+ if (! -e "run$step.$mert_logfile"){
588
+ die "Can't start from step $step, because run$step.$mert_logfile was not found!";
589
+ }
590
+ if (! -e "run$step.best$___N_BEST_LIST_SIZE.out.gz"){
591
+ die "Can't start from step $step, because run$step.best$___N_BEST_LIST_SIZE.out.gz was not found!";
592
+ }
593
+ print STDERR "All needed data are available\n";
594
+
595
+ print STDERR "Loading information from last step ($step)\n";
596
+ my %dummy; # sparse features
597
+ ($bestpoint,$devbleu) = &get_weights_from_mert("run$step.$mert_outfile","run$step.$mert_logfile",scalar @{$featlist->{"names"}},\%dummy);
598
+ die "Failed to parse mert.log, missed Best point there."
599
+ if !defined $bestpoint || !defined $devbleu;
600
+ print "($step) BEST at $step $bestpoint => $devbleu at ".`date`;
601
+ my @newweights = split /\s+/, $bestpoint;
602
+
603
+ # Sanity check: order of lambdas must match
604
+ sanity_check_order_of_lambdas($featlist,
605
+ "gunzip -c < run$step.best$___N_BEST_LIST_SIZE.out.gz |");
606
+
607
+ # update my cache of lambda values
608
+ $featlist->{"values"} = \@newweights;
609
+ }
610
+ else{
611
+ print STDERR "No previous data are needed\n";
612
+ }
613
+
614
+ $start_run = $step +1;
615
+ }
616
+
617
+ ###### MERT MAIN LOOP
618
+
619
+ my $run=$start_run-1;
620
+
621
+ my $oldallsorted = undef;
622
+ my $allsorted = undef;
623
+
624
+ my $nbest_file=undef;
625
+
626
+ while(1) {
627
+ $run++;
628
+ if ($maximum_iterations && $run > $maximum_iterations) {
629
+ print "Maximum number of iterations exceeded - stopping\n";
630
+ last;
631
+ }
632
+ # run beamdecoder with option to output nbestlists
633
+ # the end result should be (1) @NBEST_LIST, a list of lists; (2) @SCORE, a list of lists of lists
634
+
635
+ print "run $run start at ".`date`;
636
+
637
+ # In case something dies later, we might wish to have a copy
638
+ create_config($___CONFIG, "./run$run.moses.ini", $featlist, $run, (defined$devbleu?$devbleu:"--not-estimated--"),$sparse_weights_file);
639
+
640
+
641
+ # skip running the decoder if the user wanted
642
+ if (!$skip_decoder) {
643
+ print "($run) run decoder to produce n-best lists\n";
644
+ $nbest_file = run_decoder($featlist, $run, $need_to_normalize);
645
+ $need_to_normalize = 0;
646
+ safesystem("gzip -f $nbest_file") or die "Failed to gzip run*out";
647
+ $nbest_file = $nbest_file.".gz";
648
+ }
649
+ else {
650
+ $nbest_file="run$run.best$___N_BEST_LIST_SIZE.out.gz";
651
+ print "skipped decoder run $run\n";
652
+ $skip_decoder = 0;
653
+ $need_to_normalize = 0;
654
+ }
655
+
656
+ # extract score statistics and features from the nbest lists
657
+ print STDERR "Scoring the nbestlist.\n";
658
+
659
+ my $base_feature_file = "features.dat";
660
+ my $base_score_file = "scores.dat";
661
+ my $feature_file = "run$run.${base_feature_file}";
662
+ my $score_file = "run$run.${base_score_file}";
663
+
664
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
665
+ my $cmd = "";
666
+ my $scorer_name;
667
+ my $scorer_weight;
668
+ $scorer_config=~s/ //g;
669
+ my @lists_scorer_config=split(",",$scorer_config);
670
+ $mert_mert_args=$mert_mert_args." --sctype MERGE ";
671
+ my $scorer_config_spec;
672
+ foreach $scorer_config_spec(@lists_scorer_config)
673
+ {
674
+ # print STDERR $scorer_config_spec."\n";
675
+ my @lists_scorer_config_spec=split(":",$scorer_config_spec);
676
+ $scorer_name=$lists_scorer_config_spec[0];
677
+ $scorer_weight=$lists_scorer_config_spec[1];
678
+ # print STDERR $scorer_name."\n";
679
+ # print STDERR $scorer_weight."\n";
680
+ $cmd = "$mert_extract_cmd $mert_extract_args --scfile $score_file.$scorer_name --ffile $feature_file.$scorer_name --sctype $scorer_name -r ".join(",", @references)." -n $nbest_file";
681
+ # print STDERR "LANCEMENT $scorer_name ********************************************\n";
682
+ &submit_or_exec($cmd,"extract.out.$scorer_name","extract.err.$scorer_name");
683
+ # print STDERR "FIN $scorer_name ************************************************** \n";
684
+ # print STDERR "executing $cmd\n";
685
+
686
+ # print STDERR "\n";
687
+ # safesystem("date");
688
+ # print STDERR "\n";
689
+
690
+ # if (defined $___JOBS) {
691
+ # safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=extract.out.$scorer_name -stderr=extract.err.$scorer_name" )
692
+ # or die "$scorer_name Failed to submit extraction to queue (via $qsubwrapper)";
693
+ # } else {
694
+ # safesystem("$cmd > extract.out.$scorer_name 2> extract.err.$scorer_name") or die "$scorer_name Failed to do extraction of statistics.";
695
+ # }
696
+
697
+ # print FILE "$scorer_name $scorer_weight $score_file.$scorer_name $feature_file.$scorer_name\n";
698
+ }
699
+ # print STDERR "CREATION INI\n";
700
+ my @scorer_content;
701
+ my @feature_content;
702
+ my $fileIncrement=0;
703
+ my $minSizeIncrement=-1;
704
+ open(FILE,">merge.init") || die ("File creation ERROR : merge.init");
705
+ foreach $scorer_config_spec(@lists_scorer_config)
706
+ {
707
+ my @lists_scorer_config_spec=split(":",$scorer_config_spec);
708
+ $scorer_name=$lists_scorer_config_spec[0];
709
+ $scorer_weight=$lists_scorer_config_spec[1];
710
+ print FILE "$scorer_name $scorer_weight $score_file.$scorer_name $feature_file.$scorer_name\n";
711
+ my @tmp_load_content=`/bin/cat $score_file.$scorer_name`;
712
+ my @tmp_load_feat_content=`/bin/cat $feature_file.$scorer_name`;
713
+ my @tmp_content;
714
+ my @tmp_feat_content;
715
+ my $contentIncrement=0;
716
+ my @tmp_part_content;
717
+ my $increment_part=0;
718
+ while ($contentIncrement<scalar(@tmp_load_feat_content))
719
+ {
720
+ my $line=$tmp_load_feat_content[$contentIncrement];
721
+ chomp($line);
722
+ $line=~s/^[ ]+//g;
723
+ $line=~s/[ ]+$//g;
724
+ $line=~s/[ ]+/ /g;
725
+ push @tmp_part_content,$line;
726
+ if (rindex($line,"FEATURES_TXT_END")>-1)
727
+ {
728
+ $tmp_feat_content[$increment_part] = [ @tmp_part_content ];
729
+ $increment_part++;
730
+ @tmp_part_content=();
731
+ }
732
+ $contentIncrement++;
733
+ }
734
+ $contentIncrement=0;
735
+ $increment_part=0;
736
+ @tmp_part_content=();
737
+ while ($contentIncrement<scalar(@tmp_load_content))
738
+ {
739
+ my $line=$tmp_load_content[$contentIncrement];
740
+ chomp($line);
741
+ $line=~s/^[ ]+//g;
742
+ $line=~s/[ ]+$//g;
743
+ $line=~s/[ ]+/ /g;
744
+ push @tmp_part_content,$line;
745
+ if (rindex($line,"SCORES_TXT_END")>-1)
746
+ {
747
+ $tmp_content[$increment_part] = [ @tmp_part_content ];
748
+ $increment_part++;
749
+ @tmp_part_content=();
750
+ }
751
+ $contentIncrement++;
752
+ }
753
+ if ($minSizeIncrement<0 || $minSizeIncrement>$increment_part)
754
+ {
755
+ $minSizeIncrement=$increment_part;
756
+ }
757
+ $scorer_content[$fileIncrement] = [ @tmp_content ];
758
+ $feature_content[$fileIncrement] = [ @tmp_feat_content ];
759
+ # if ($fileIncrement==0)
760
+ # {
761
+ # `/bin/cp $feature_file.$scorer_name $feature_file`;
762
+ # }
763
+ $fileIncrement++;
764
+ }
765
+ close(FILE);
766
+ # print STDERR "\n";
767
+ # safesystem("date");
768
+ # print STDERR "\n";
769
+
770
+ # print STDERR "ON VA RASSEMBLER dans $score_file\n";
771
+ open(SCOREFILE,">$score_file") || die ("File creation ERROR : $score_file");
772
+ open(FEATFILE,">$feature_file") || die ("File creation ERROR : $feature_file");
773
+ my $newFileIncrement=0;
774
+ my $contentIncrement=0;
775
+ my $maxContent=100;
776
+ my $increment_part=0;
777
+ my $contentSize=scalar(@{$scorer_content[0]});
778
+ # print STDERR "TAILLE : ".$contentSize."|".$fileIncrement."|".$minSizeIncrement."\n";
779
+ while ($increment_part<$minSizeIncrement)
780
+ {
781
+ $contentIncrement=0;
782
+ # print STDERR "increment_part : $increment_part\n";
783
+ while ($contentIncrement< $maxContent)
784
+ {
785
+ # print STDERR "contentIncrement : $contentIncrement\n";
786
+ my $line="";
787
+ my $featureLine="";
788
+ my $createLines=1;
789
+ $newFileIncrement=0;
790
+ while($newFileIncrement< $fileIncrement)
791
+ {
792
+ # print STDERR "newFileIncrement : $newFileIncrement\n";
793
+ if (rindex($scorer_content[$newFileIncrement][$increment_part][$contentIncrement],"BEGIN")<0)
794
+ {
795
+ if (rindex($line,"SCORES_TXT_END")>-1)
796
+ {
797
+ # $line=$line;
798
+ # chomp($line);
799
+ }
800
+ elsif (rindex($scorer_content[$newFileIncrement][$increment_part][$contentIncrement],"SCORES_TXT_END")>-1)
801
+ {
802
+ $line=$scorer_content[$newFileIncrement][$increment_part][$contentIncrement];
803
+ $featureLine=$feature_content[$newFileIncrement][$increment_part][$contentIncrement];
804
+ }
805
+ else
806
+ {
807
+ $line=$line." ".$scorer_content[$newFileIncrement][$increment_part][$contentIncrement];
808
+ chomp($line);
809
+ if (length($featureLine)>0 && rindex($featureLine,$feature_content[$newFileIncrement][$increment_part][$contentIncrement])==0)
810
+ {
811
+
812
+ $featureLine=$feature_content[$newFileIncrement][$increment_part][$contentIncrement];
813
+ chomp($featureLine);
814
+ }
815
+ elsif (length($featureLine)>0)
816
+ {
817
+ # $createLines=0;
818
+ my @split_line=split(/[\s]+/,$featureLine);
819
+ my @split_line_input=split(/[\s]+/,$feature_content[$newFileIncrement][$increment_part][$contentIncrement]);
820
+ my $i=0;
821
+ $featureLine="";
822
+ for ($i=0;$i<scalar(@split_line_input);$i++)
823
+ {
824
+ $split_line_input[$i]=($split_line_input[$i]+$split_line[$i])/2;
825
+ $featureLine=$featureLine.$split_line_input[$i]." ";
826
+ }
827
+ }
828
+ elsif (length($featureLine)==0)
829
+ {
830
+ $featureLine=$feature_content[$newFileIncrement][$increment_part][$contentIncrement];
831
+ chomp($featureLine);
832
+ }
833
+ }
834
+ }
835
+ else
836
+ {
837
+ my @split_line_input=split(" ",$scorer_content[$newFileIncrement][$increment_part][$contentIncrement]);
838
+ my @split_line_feat_input=split(/[\s]+/,$feature_content[$newFileIncrement][$increment_part][$contentIncrement]);
839
+ my @split_line=split(" ",$line);
840
+ if (scalar(@split_line)>4)
841
+ {
842
+ $split_line_input[3]=$split_line[3]+$split_line_input[3];
843
+ }
844
+ if (scalar(@split_line_input)>4)
845
+ {
846
+ if (scalar(@split_line)>4)
847
+ {
848
+ if ($split_line[2]<$split_line_input[2])
849
+ {
850
+ $split_line_input[2]=$split_line[2];
851
+ }
852
+ }
853
+ else
854
+ {
855
+ ## Nothing to do
856
+ }
857
+ $maxContent=$split_line_input[2]+2;
858
+ # print STDERR "maxContent : $maxContent : ".$scorer_content[$newFileIncrement][$increment_part][$contentIncrement]."\n";
859
+ }
860
+ else
861
+ {
862
+ die "scoreFile bad format : ".$scorer_content[$newFileIncrement][$increment_part][$contentIncrement]."\n";
863
+ }
864
+ $line=$split_line_input[0]." ".$split_line_input[1]." ".$split_line_input[2]." ".$split_line_input[3]." MERGE";
865
+ my $i=0;
866
+ $featureLine="";
867
+ for ($i=0;$i<scalar(@split_line_feat_input);$i++)
868
+ {
869
+ # $split_line_feat_input[$i]=($split_line_input[$i]+$split_line[$i])/2;
870
+ if ($i==2)
871
+ {
872
+ $featureLine=$featureLine.$split_line_input[2]." ";
873
+ }
874
+ else
875
+ {
876
+ $featureLine=$featureLine.$split_line_feat_input[$i]." ";
877
+ }
878
+ }
879
+
880
+ # $featureLine=$feature_content[$newFileIncrement][$increment_part][$contentIncrement];
881
+ }
882
+ $newFileIncrement++;
883
+ }
884
+ $line=~s/^[ ]+//g;
885
+ $line=~s/[ ]+$//g;
886
+ $line=~s/[ ]+/ /g;
887
+ # $line=~s/( SCORES_TXT_END[^!]*)//g;
888
+ # print STDERR $line."\n";
889
+ # if ($createLines>0)
890
+ # {
891
+ print SCOREFILE $line."\n";
892
+ print FEATFILE $featureLine."\n";
893
+ # }
894
+ $contentIncrement++;
895
+ }
896
+ $increment_part++;
897
+ }
898
+ close(SCOREFILE);
899
+ close(FEATFILE);
900
+ # `/bin/cp `
901
+
902
+ # $cmd="$mertdir/mergeWeights -c merge.init -s $score_file -f $feature_file";
903
+ # print STDERR "executing : $cmd\n";
904
+
905
+ # if (defined $___JOBS) {
906
+ # safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=mergeWeight.out.MERGE -stderr=mergeWeight.err.MERGE" )
907
+ # or die "MERGE Failed to submit extraction to queue (via $qsubwrapper)";
908
+ # } else {
909
+ # safesystem("$cmd > mergeWeight.out.MERGE 2> mergeWeight.err.MERGE") or die "MERGE Failed to do extraction of statistics.";
910
+ # }
911
+
912
+ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
913
+
914
+ # my $cmd = "$mert_extract_cmd $mert_extract_args --scfile $score_file --ffile $feature_file -r ".join(",", @references)." -n $nbest_file";
915
+ # &submit_or_exec($cmd,"extract.out","extract.err");
916
+
917
+ # Create the initial weights file for mert: init.opt
918
+
919
+ my @MIN = @{$featlist->{"mins"}};
920
+ my @MAX = @{$featlist->{"maxs"}};
921
+ my @CURR = @{$featlist->{"values"}};
922
+ my @NAME = @{$featlist->{"names"}};
923
+
924
+ open(OUT,"> $weights_in_file")
925
+ or die "Can't write $weights_in_file (WD now $___WORKING_DIR)";
926
+ print OUT join(" ", @CURR)."\n";
927
+ print OUT join(" ", @MIN)."\n"; # this is where we could pass MINS
928
+ print OUT join(" ", @MAX)."\n"; # this is where we could pass MAXS
929
+ close(OUT);
930
+ # print join(" ", @NAME)."\n";
931
+
932
+ # make a backup copy labelled with this run number
933
+ safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die;
934
+
935
+ my $DIM = scalar(@CURR); # number of lambdas
936
+
937
+ # run mert
938
+ $cmd = "$mert_mert_cmd -d $DIM $mert_mert_args";
939
+
940
+ my $mert_settings = " -n $___RANDOM_RESTARTS";
941
+ if ($___PREDICTABLE_SEEDS) {
942
+ my $seed = $run * 1000;
943
+ $mert_settings .= " -r $seed";
944
+ }
945
+ if ($___RANDOM_DIRECTIONS) {
946
+ if ($___NUM_RANDOM_DIRECTIONS == 0) {
947
+ $mert_settings .= " -m 50";
948
+ }
949
+ $mert_settings .= " -t random-direction";
950
+ }
951
+ if ($___NUM_RANDOM_DIRECTIONS) {
952
+ $mert_settings .= " -m $___NUM_RANDOM_DIRECTIONS";
953
+ }
954
+ if ($__THREADS) {
955
+ $mert_settings .= " --threads $__THREADS";
956
+ }
957
+
958
+ my $file_settings = "";
959
+ if (defined $prev_feature_file) {
960
+ $file_settings .= " --ffile $prev_feature_file,$feature_file";
961
+ }
962
+ else{
963
+ $file_settings .= " --ffile $feature_file";
964
+ }
965
+ if (defined $prev_score_file) {
966
+ $file_settings .= " --scfile $prev_score_file,$score_file";
967
+ }
968
+ else{
969
+ $file_settings .= " --scfile $score_file";
970
+ }
971
+ if ($___START_WITH_HISTORIC_BESTS && defined $prev_init_file) {
972
+ $file_settings .= " --ifile $prev_init_file,run$run.$weights_in_file";
973
+ }
974
+ else{
975
+ $file_settings .= " --ifile run$run.$weights_in_file";
976
+ }
977
+
978
+ $cmd .= $file_settings;
979
+
980
+ # pro optimization
981
+ if ($___PAIRWISE_RANKED_OPTIMIZER) {
982
+ $cmd .= " --pro run$run.pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer -fvals -maxi 30 -nobias binary run$run.pro.data";
983
+ &submit_or_exec($cmd,$mert_outfile,$mert_logfile);
984
+ }
985
+ # first pro, then mert
986
+ elsif ($___PRO_STARTING_POINT) {
987
+ # run pro...
988
+ my $pro_cmd = $cmd." --pro run$run.pro.data ; $pro_optimizer -fvals -maxi 30 -nobias binary run$run.pro.data";
989
+ &submit_or_exec($pro_cmd,"run$run.pro.out","run$run.pro.err");
990
+ # ... get results ...
991
+ my %dummy;
992
+ ($bestpoint,$devbleu) = &get_weights_from_mert("run$run.pro.out","run$run.pro.err",scalar @{$featlist->{"names"}},\%dummy);
993
+ open(PRO_START,">run$run.init.pro");
994
+ print PRO_START $bestpoint."\n";
995
+ close(PRO_START);
996
+ # ... and run mert
997
+ $cmd =~ s/(--ifile \S+)/$1,run$run.init.pro/;
998
+ &submit_or_exec($cmd.$mert_settings,$mert_outfile,$mert_logfile);
999
+ }
1000
+ # just mert
1001
+ else {
1002
+ &submit_or_exec($cmd.$mert_settings,$mert_outfile,$mert_logfile);
1003
+ }
1004
+
1005
+ die "Optimization failed, file $weights_out_file does not exist or is empty"
1006
+ if ! -s $weights_out_file;
1007
+
1008
+ # backup copies
1009
+ foreach my $extractFiles(`/bin/ls extract.*`)
1010
+ {
1011
+ chomp $extractFiles;
1012
+ safesystem ("\\cp -f $extractFiles run$run.$extractFiles") or die;
1013
+ }
1014
+
1015
+ # safesystem ("\\cp -f extract.err run$run.extract.err") or die;
1016
+ # safesystem ("\\cp -f extract.out run$run.extract.out") or die;
1017
+ safesystem ("\\cp -f $mert_outfile run$run.$mert_outfile") or die;
1018
+ safesystem ("\\cp -f $mert_logfile run$run.$mert_logfile") or die;
1019
+ safesystem ("touch $mert_logfile run$run.$mert_logfile") or die;
1020
+ safesystem ("\\cp -f $weights_out_file run$run.$weights_out_file") or die; # this one is needed for restarts, too
1021
+
1022
+ print "run $run end at ".`date`;
1023
+
1024
+ my %sparse_weights; # sparse features
1025
+ ($bestpoint,$devbleu) = &get_weights_from_mert("run$run.$mert_outfile","run$run.$mert_logfile",scalar @{$featlist->{"names"}},\%sparse_weights);
1026
+
1027
+ die "Failed to parse mert.log, missed Best point there."
1028
+ if !defined $bestpoint || !defined $devbleu;
1029
+
1030
+ print "($run) BEST at $run: $bestpoint => $devbleu at ".`date`;
1031
+
1032
+ # update my cache of lambda values
1033
+ my @newweights = split /\s+/, $bestpoint;
1034
+
1035
+ # interpolate with prior's interation weight, if historic-interpolation is specified
1036
+ if ($___HISTORIC_INTERPOLATION>0 && $run>3) {
1037
+ my %historic_sparse_weights;
1038
+ if (-e "run$run.sparse-weights") {
1039
+ open(SPARSE,"run$run.sparse-weights");
1040
+ while(<SPARSE>) {
1041
+ chop;
1042
+ my ($feature,$weight) = split;
1043
+ $historic_sparse_weights{$feature} = $weight;
1044
+ }
1045
+ }
1046
+ my $prev = $run-1;
1047
+ my @historic_weights = split /\s+/, `cat run$prev.$weights_out_file`;
1048
+ for(my $i=0;$i<scalar(@newweights);$i++) {
1049
+ $newweights[$i] = $___HISTORIC_INTERPOLATION * $newweights[$i] + (1-$___HISTORIC_INTERPOLATION) * $historic_weights[$i];
1050
+ }
1051
+ print "interpolate with ".join(",",@historic_weights)." to ".join(",",@newweights);
1052
+ foreach (keys %sparse_weights) {
1053
+ $sparse_weights{$_} *= $___HISTORIC_INTERPOLATION;
1054
+ #print STDERR "sparse_weights{$_} *= $___HISTORIC_INTERPOLATION -> $sparse_weights{$_}\n";
1055
+ }
1056
+ foreach (keys %historic_sparse_weights) {
1057
+ $sparse_weights{$_} += (1-$___HISTORIC_INTERPOLATION) * $historic_sparse_weights{$_};
1058
+ #print STDERR "sparse_weights{$_} += (1-$___HISTORIC_INTERPOLATION) * $historic_sparse_weights{$_} -> $sparse_weights{$_}\n";
1059
+ }
1060
+ }
1061
+ if ($___HISTORIC_INTERPOLATION>0) {
1062
+ open(WEIGHTS,">run$run.$weights_out_file");
1063
+ print WEIGHTS join(" ",@newweights);
1064
+ close(WEIGHTS);
1065
+ }
1066
+
1067
+ $featlist->{"values"} = \@newweights;
1068
+
1069
+ if (scalar keys %sparse_weights) {
1070
+ $sparse_weights_file = "run".($run+1).".sparse-weights";
1071
+ open(SPARSE,">".$sparse_weights_file);
1072
+ foreach my $feature (keys %sparse_weights) {
1073
+ print SPARSE "$feature $sparse_weights{$feature}\n";
1074
+ }
1075
+ close(SPARSE);
1076
+ }
1077
+
1078
+ ## additional stopping criterion: weights have not changed
1079
+ my $shouldstop = 1;
1080
+ for(my $i=0; $i<@CURR; $i++) {
1081
+ die "Lost weight! mert reported fewer weights (@newweights) than we gave it (@CURR)"
1082
+ if !defined $newweights[$i];
1083
+ if (abs($CURR[$i] - $newweights[$i]) >= $minimum_required_change_in_weights) {
1084
+ $shouldstop = 0;
1085
+ last;
1086
+ }
1087
+ }
1088
+
1089
+ open F, "> finished_step.txt" or die "Can't mark finished step";
1090
+ print F $run."\n";
1091
+ close F;
1092
+
1093
+ if ($shouldstop) {
1094
+ print STDERR "None of the weights changed more than $minimum_required_change_in_weights. Stopping.\n";
1095
+ last;
1096
+ }
1097
+
1098
+ my $firstrun;
1099
+ if ($prev_aggregate_nbl_size==-1){
1100
+ $firstrun=1;
1101
+ }
1102
+ else{
1103
+ $firstrun=$run-$prev_aggregate_nbl_size+1;
1104
+ $firstrun=($firstrun>0)?$firstrun:1;
1105
+ }
1106
+ print "loading data from $firstrun to $run (prev_aggregate_nbl_size=$prev_aggregate_nbl_size)\n";
1107
+ $prev_feature_file = undef;
1108
+ $prev_score_file = undef;
1109
+ $prev_init_file = undef;
1110
+ for (my $i=$firstrun;$i<=$run;$i++){
1111
+ if (defined $prev_feature_file){
1112
+ $prev_feature_file = "${prev_feature_file},run${i}.${base_feature_file}";
1113
+ }
1114
+ else{
1115
+ $prev_feature_file = "run${i}.${base_feature_file}";
1116
+ }
1117
+ if (defined $prev_score_file){
1118
+ $prev_score_file = "${prev_score_file},run${i}.${base_score_file}";
1119
+ }
1120
+ else{
1121
+ $prev_score_file = "run${i}.${base_score_file}";
1122
+ }
1123
+ if (defined $prev_init_file){
1124
+ $prev_init_file = "${prev_init_file},run${i}.${weights_in_file}";
1125
+ }
1126
+ else{
1127
+ $prev_init_file = "run${i}.${weights_in_file}";
1128
+ }
1129
+ }
1130
+ print "loading data from $prev_feature_file\n" if defined($prev_feature_file);
1131
+ print "loading data from $prev_score_file\n" if defined($prev_score_file);
1132
+ print "loading data from $prev_init_file\n" if defined($prev_init_file);
1133
+ }
1134
+ print "Training finished at ".`date`;
1135
+
1136
+ if (defined $allsorted){ safesystem ("\\rm -f $allsorted") or die; };
1137
+
1138
+ safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die;
1139
+ safesystem("\\cp -f $mert_logfile run$run.$mert_logfile") or die;
1140
+
1141
+ create_config($___CONFIG_ORIG, "./moses.ini", $featlist, $run, $devbleu);
1142
+
1143
+ # just to be sure that we have the really last finished step marked
1144
+ open F, "> finished_step.txt" or die "Can't mark finished step";
1145
+ print F $run."\n";
1146
+ close F;
1147
+
1148
+
1149
+ #chdir back to the original directory # useless, just to remind we were not there
1150
+ chdir($cwd);
1151
+
1152
+ } # end of local scope
1153
+
1154
+ sub get_weights_from_mert {
1155
+ my ($outfile,$logfile,$weight_count,$sparse_weights) = @_;
1156
+ my ($bestpoint,$devbleu);
1157
+ if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/)) {
1158
+ open(IN,$outfile) or die "Can't open $outfile";
1159
+ my (@WEIGHT,$sum);
1160
+ for(my $i=0;$i<$weight_count;$i++) { push @WEIGHT, 0; }
1161
+ while(<IN>) {
1162
+ # regular features
1163
+ if (/^F(\d+) ([\-\.\de]+)/) {
1164
+ $WEIGHT[$1] = $2;
1165
+ $sum += abs($2);
1166
+ }
1167
+ # sparse features
1168
+ elsif(/^(.+_.+) ([\-\.\de]+)/) {
1169
+ $$sparse_weights{$1} = $2;
1170
+ }
1171
+ }
1172
+ $devbleu = "unknown";
1173
+ foreach (@WEIGHT) { $_ /= $sum; }
1174
+ foreach (keys %{$sparse_weights}) { $$sparse_weights{$_} /= $sum; }
1175
+ $bestpoint = join(" ",@WEIGHT);
1176
+ close IN;
1177
+ }
1178
+ else {
1179
+ open(IN,$logfile) or die "Can't open $logfile";
1180
+ while (<IN>) {
1181
+ if (/Best point:\s*([\s\d\.\-e]+?)\s*=> ([\-\d\.]+)/) {
1182
+ $bestpoint = $1;
1183
+ $devbleu = $2;
1184
+ last;
1185
+ }
1186
+ }
1187
+ close IN;
1188
+ }
1189
+ return ($bestpoint,$devbleu);
1190
+ }
1191
+
1192
+ sub run_decoder {
1193
+ my ($featlist, $run, $need_to_normalize) = @_;
1194
+ my $filename_template = "run%d.best$___N_BEST_LIST_SIZE.out";
1195
+ my $filename = sprintf($filename_template, $run);
1196
+
1197
+ # user-supplied parameters
1198
+ print "params = $___DECODER_FLAGS\n";
1199
+
1200
+ # parameters to set all model weights (to override moses.ini)
1201
+ my @vals = @{$featlist->{"values"}};
1202
+ if ($need_to_normalize) {
1203
+ print STDERR "Normalizing lambdas: @vals\n";
1204
+ my $totlambda=0;
1205
+ grep($totlambda+=abs($_),@vals);
1206
+ grep($_/=$totlambda,@vals);
1207
+ }
1208
+ # moses now does not seem accept "-tm X -tm Y" but needs "-tm X Y"
1209
+ my %model_weights;
1210
+ for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
1211
+ my $name = $featlist->{"names"}->[$i];
1212
+ $model_weights{$name} = "-$name" if !defined $model_weights{$name};
1213
+ $model_weights{$name} .= sprintf " %.6f", $vals[$i];
1214
+ }
1215
+ my $decoder_config = join(" ", values %model_weights);
1216
+ print STDERR "DECODER_CFG = $decoder_config\n";
1217
+ print "decoder_config = $decoder_config\n";
1218
+
1219
+ # run the decoder
1220
+ my $nBest_cmd = "-n-best-size $___N_BEST_LIST_SIZE";
1221
+ my $decoder_cmd;
1222
+
1223
+ if (defined $___JOBS && $___JOBS > 0) {
1224
+ $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$___DECODER_FLAGS $decoder_config\" -n-best-list \"$filename $___N_BEST_LIST_SIZE\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
1225
+ } else {
1226
+ $decoder_cmd = "$___DECODER $___DECODER_FLAGS -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config -n-best-list $filename $___N_BEST_LIST_SIZE -input-file $___DEV_F > run$run.out";
1227
+ }
1228
+
1229
+ safesystem($decoder_cmd) or die "The decoder died. CONFIG WAS $decoder_config \n";
1230
+
1231
+ sanity_check_order_of_lambdas($featlist, $filename);
1232
+ return $filename;
1233
+ }
1234
+
1235
+
1236
+ sub insert_ranges_to_featlist {
1237
+ my $featlist = shift;
1238
+ my $ranges = shift;
1239
+
1240
+ $ranges = [] if !defined $ranges;
1241
+
1242
+ # first collect the ranges from options
1243
+ my $niceranges;
1244
+ foreach my $range (@$ranges) {
1245
+ my $name = undef;
1246
+ foreach my $namedpair (split /,/, $range) {
1247
+ if ($namedpair =~ /^(.*?):/) {
1248
+ $name = $1;
1249
+ $namedpair =~ s/^.*?://;
1250
+ die "Unrecognized name '$name' in --range=$range"
1251
+ if !defined $ABBR2FULL{$name};
1252
+ }
1253
+ my ($min, $max) = split /\.\./, $namedpair;
1254
+ die "Bad min '$min' in --range=$range" if $min !~ /^-?[0-9.]+$/;
1255
+ die "Bad max '$max' in --range=$range" if $min !~ /^-?[0-9.]+$/;
1256
+ die "No name given in --range=$range" if !defined $name;
1257
+ push @{$niceranges->{$name}}, [$min, $max];
1258
+ }
1259
+ }
1260
+
1261
+ # now populate featlist
1262
+ my $seen = undef;
1263
+ for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
1264
+ my $name = $featlist->{"names"}->[$i];
1265
+ $seen->{$name} ++;
1266
+ my $min = 0.0;
1267
+ my $max = 1.0;
1268
+ if (defined $niceranges->{$name}) {
1269
+ my $minmax = shift @{$niceranges->{$name}};
1270
+ ($min, $max) = @$minmax if defined $minmax;
1271
+ }
1272
+ $featlist->{"mins"}->[$i] = $min;
1273
+ $featlist->{"maxs"}->[$i] = $max;
1274
+ }
1275
+ return $featlist;
1276
+ }
1277
+
1278
+ sub sanity_check_order_of_lambdas {
1279
+ my $featlist = shift;
1280
+ my $filename_or_stream = shift;
1281
+
1282
+ my @expected_lambdas = @{$featlist->{"names"}};
1283
+ my @got = get_order_of_scores_from_nbestlist($filename_or_stream);
1284
+ die "Mismatched lambdas. Decoder returned @got, we expected @expected_lambdas"
1285
+ if "@got" ne "@expected_lambdas";
1286
+ }
1287
+
1288
+
1289
+ sub get_featlist_from_moses {
1290
+ # run moses with the given config file and return the list of features and
1291
+ # their initial values
1292
+ my $configfn = shift;
1293
+ my $featlistfn = "./features.list";
1294
+ if (-e $featlistfn) {
1295
+ print STDERR "Using cached features list: $featlistfn\n";
1296
+ } else {
1297
+ print STDERR "Asking moses for feature names and values from $___CONFIG\n";
1298
+ my $cmd = "$___DECODER $___DECODER_FLAGS -config $configfn -inputtype $___INPUTTYPE -show-weights > $featlistfn";
1299
+ safesystem($cmd) or die "Failed to run moses with the config $configfn";
1300
+ }
1301
+
1302
+ # read feature list
1303
+ my @names = ();
1304
+ my @startvalues = ();
1305
+ open(INI,$featlistfn) or die "Can't read $featlistfn";
1306
+ my $nr = 0;
1307
+ my @errs = ();
1308
+ while (<INI>) {
1309
+ $nr++;
1310
+ chomp;
1311
+ /^(.+) (\S+) (\S+)$/ || die("invalid feature: $_");
1312
+ my ($longname, $feature, $value) = ($1,$2,$3);
1313
+ next if $value eq "sparse";
1314
+ push @errs, "$featlistfn:$nr:Bad initial value of $feature: $value\n"
1315
+ if $value !~ /^[+-]?[0-9.e]+$/;
1316
+ push @errs, "$featlistfn:$nr:Unknown feature '$feature', please add it to \@ABBR_FULL_MAP\n"
1317
+ if !defined $ABBR2FULL{$feature};
1318
+ push @names, $feature;
1319
+ push @startvalues, $value;
1320
+ }
1321
+ close INI;
1322
+ if (scalar @errs) {
1323
+ print STDERR join("", @errs);
1324
+ exit 1;
1325
+ }
1326
+ return {"names"=>\@names, "values"=>\@startvalues};
1327
+ }
1328
+
1329
+
1330
+ sub get_order_of_scores_from_nbestlist {
1331
+ # read the first line and interpret the ||| label: num num num label2: num ||| column in nbestlist
1332
+ # return the score labels in order
1333
+ my $fname_or_source = shift;
1334
+ # print STDERR "Peeking at the beginning of nbestlist to get order of scores: $fname_or_source\n";
1335
+ open IN, $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source'";
1336
+ my $line = <IN>;
1337
+ close IN;
1338
+ die "Line empty in nbestlist '$fname_or_source'" if !defined $line;
1339
+ my ($sent, $hypo, $scores, $total) = split /\|\|\|/, $line;
1340
+ $scores =~ s/^\s*|\s*$//g;
1341
+ die "No scores in line: $line" if $scores eq "";
1342
+
1343
+ my @order = ();
1344
+ my $label = undef;
1345
+ my $sparse = 0; # we ignore sparse features here
1346
+ foreach my $tok (split /\s+/, $scores) {
1347
+ if ($tok =~ /.+_.+:/) {
1348
+ $sparse = 1;
1349
+ } elsif ($tok =~ /^([a-z][0-9a-z]*):/i) {
1350
+ $label = $1;
1351
+ } elsif ($tok =~ /^-?[-0-9.e]+$/) {
1352
+ if (!$sparse) {
1353
+ # a score found, remember it
1354
+ die "Found a score but no label before it! Bad nbestlist '$fname_or_source'!"
1355
+ if !defined $label;
1356
+ push @order, $label;
1357
+ }
1358
+ $sparse = 0;
1359
+ } else {
1360
+ die "Not a label, not a score '$tok'. Failed to parse the scores string: '$scores' of nbestlist '$fname_or_source'";
1361
+ }
1362
+ }
1363
+ print STDERR "The decoder returns the scores in this order: @order\n";
1364
+ return @order;
1365
+ }
1366
+
1367
+ sub create_config {
1368
+ my $infn = shift; # source config
1369
+ my $outfn = shift; # where to save the config
1370
+ my $featlist = shift; # the lambdas we should write
1371
+ my $iteration = shift; # just for verbosity
1372
+ my $bleu_achieved = shift; # just for verbosity
1373
+ my $sparse_weights_file = shift; # only defined when optimizing sparse features
1374
+
1375
+ my %P; # the hash of all parameters we wish to override
1376
+
1377
+ # first convert the command line parameters to the hash
1378
+ { # ensure local scope of vars
1379
+ my $parameter=undef;
1380
+ print "Parsing --decoder-flags: |$___DECODER_FLAGS|\n";
1381
+ $___DECODER_FLAGS =~ s/^\s*|\s*$//;
1382
+ $___DECODER_FLAGS =~ s/\s+/ /;
1383
+ foreach (split(/ /,$___DECODER_FLAGS)) {
1384
+ if (/^\-([^\d].*)$/) {
1385
+ $parameter = $1;
1386
+ $parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
1387
+ }
1388
+ else {
1389
+ die "Found value with no -paramname before it: $_"
1390
+ if !defined $parameter;
1391
+ push @{$P{$parameter}},$_;
1392
+ }
1393
+ }
1394
+ }
1395
+
1396
+ # First delete all weights params from the input, we're overwriting them.
1397
+ # Delete both short and long-named version.
1398
+ for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
1399
+ my $name = $featlist->{"names"}->[$i];
1400
+ delete($P{$name});
1401
+ delete($P{$ABBR2FULL{$name}});
1402
+ }
1403
+
1404
+ # Convert weights to elements in P
1405
+ for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
1406
+ my $name = $featlist->{"names"}->[$i];
1407
+ my $val = $featlist->{"values"}->[$i];
1408
+ $name = defined $ABBR2FULL{$name} ? $ABBR2FULL{$name} : $name;
1409
+ # ensure long name
1410
+ push @{$P{$name}}, $val;
1411
+ }
1412
+
1413
+ if (defined($sparse_weights_file)) {
1414
+ push @{$P{"weights-file"}}, $___WORKING_DIR."/".$sparse_weights_file;
1415
+ }
1416
+
1417
+ # create new moses.ini decoder config file by cloning and overriding the original one
1418
+ open(INI,$infn) or die "Can't read $infn";
1419
+ delete($P{"config"}); # never output
1420
+ print "Saving new config to: $outfn\n";
1421
+ open(OUT,"> $outfn") or die "Can't write $outfn";
1422
+ print OUT "# MERT optimized configuration\n";
1423
+ print OUT "# decoder $___DECODER\n";
1424
+ print OUT "# BLEU $bleu_achieved on dev $___DEV_F\n";
1425
+ print OUT "# We were before running iteration $iteration\n";
1426
+ print OUT "# finished ".`date`;
1427
+ my $line = <INI>;
1428
+ while(1) {
1429
+ last unless $line;
1430
+
1431
+ # skip until hit [parameter]
1432
+ if ($line !~ /^\[(.+)\]\s*$/) {
1433
+ $line = <INI>;
1434
+ print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
1435
+ next;
1436
+ }
1437
+
1438
+ # parameter name
1439
+ my $parameter = $1;
1440
+ $parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
1441
+ print OUT "[$parameter]\n";
1442
+
1443
+ # change parameter, if new values
1444
+ if (defined($P{$parameter})) {
1445
+ # write new values
1446
+ foreach (@{$P{$parameter}}) {
1447
+ print OUT $_."\n";
1448
+ }
1449
+ delete($P{$parameter});
1450
+ # skip until new parameter, only write comments
1451
+ while($line = <INI>) {
1452
+ print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
1453
+ last if $line =~ /^\[/;
1454
+ last unless $line;
1455
+ }
1456
+ next;
1457
+ }
1458
+
1459
+ # unchanged parameter, write old
1460
+ while($line = <INI>) {
1461
+ last if $line =~ /^\[/;
1462
+ print OUT $line;
1463
+ }
1464
+ }
1465
+
1466
+ # write all additional parameters
1467
+ foreach my $parameter (keys %P) {
1468
+ print OUT "\n[$parameter]\n";
1469
+ foreach (@{$P{$parameter}}) {
1470
+ print OUT $_."\n";
1471
+ }
1472
+ }
1473
+
1474
+ close(INI);
1475
+ close(OUT);
1476
+ print STDERR "Saved: $outfn\n";
1477
+ }
1478
+
1479
+ sub safesystem {
1480
+ print STDERR "Executing: @_\n";
1481
+ system(@_);
1482
+ if ($? == -1) {
1483
+ print STDERR "Failed to execute: @_\n $!\n";
1484
+ exit(1);
1485
+ }
1486
+ elsif ($? & 127) {
1487
+ printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
1488
+ ($? & 127), ($? & 128) ? 'with' : 'without';
1489
+ exit(1);
1490
+ }
1491
+ else {
1492
+ my $exitcode = $? >> 8;
1493
+ print STDERR "Exit code: $exitcode\n" if $exitcode;
1494
+ return ! $exitcode;
1495
+ }
1496
+ }
1497
+ sub ensure_full_path {
1498
+ my $PATH = shift;
1499
+ $PATH =~ s/\/nfsmnt//;
1500
+ return $PATH if $PATH =~ /^\//;
1501
+ my $dir = `pawd 2>/dev/null`;
1502
+ if(!$dir){$dir = `pwd`;}
1503
+ chomp($dir);
1504
+ $PATH = $dir."/".$PATH;
1505
+ $PATH =~ s/[\r\n]//g;
1506
+ $PATH =~ s/\/\.\//\//g;
1507
+ $PATH =~ s/\/+/\//g;
1508
+ my $sanity = 0;
1509
+ while($PATH =~ /\/\.\.\// && $sanity++<10) {
1510
+ $PATH =~ s/\/+/\//g;
1511
+ $PATH =~ s/\/[^\/]+\/\.\.\//\//g;
1512
+ }
1513
+ $PATH =~ s/\/[^\/]+\/\.\.$//;
1514
+ $PATH =~ s/\/+$//;
1515
+ $PATH =~ s/\/nfsmnt//;
1516
+ return $PATH;
1517
+ }
1518
+
1519
+ sub submit_or_exec {
1520
+ my ($cmd,$stdout,$stderr) = @_;
1521
+ print STDERR "exec: $cmd\n";
1522
+ if (defined $___JOBS && $___JOBS > 0) {
1523
+ safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=$stdout -stderr=$stderr" )
1524
+ or die "ERROR: Failed to submit '$cmd' (via $qsubwrapper)";
1525
+ }
1526
+ else {
1527
+ safesystem("$cmd > $stdout 2> $stderr") or die "ERROR: Failed to run '$cmd'.";
1528
+ }
1529
+ }
mosesdecoder/contrib/relent-filter/src/IOWrapper.cpp ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (c) 2006 University of Edinburgh
6
+ All rights reserved.
7
+
8
+ Redistribution and use in source and binary forms, with or without modification,
9
+ are permitted provided that the following conditions are met:
10
+
11
+ * Redistributions of source code must retain the above copyright notice,
12
+ this list of conditions and the following disclaimer.
13
+ * Redistributions in binary form must reproduce the above copyright notice,
14
+ this list of conditions and the following disclaimer in the documentation
15
+ and/or other materials provided with the distribution.
16
+ * Neither the name of the University of Edinburgh nor the names of its contributors
17
+ may be used to endorse or promote products derived from this software
18
+ without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23
+ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
24
+ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
28
+ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
+ POSSIBILITY OF SUCH DAMAGE.
31
+ ***********************************************************************/
32
+
33
+ // example file on how to use moses library
34
+
35
+ #include <iostream>
36
+ #include <stack>
37
+ #include "TypeDef.h"
38
+ #include "Util.h"
39
+ #include "IOWrapper.h"
40
+ #include "Hypothesis.h"
41
+ #include "WordsRange.h"
42
+ #include "TrellisPathList.h"
43
+ #include "StaticData.h"
44
+ #include "DummyScoreProducers.h"
45
+ #include "InputFileStream.h"
46
+
47
+ using namespace std;
48
+ using namespace Moses;
49
+
50
+ namespace MosesCmd
51
+ {
52
+
53
+ IOWrapper::IOWrapper(
54
+ const vector<FactorType> &inputFactorOrder
55
+ , const vector<FactorType> &outputFactorOrder
56
+ , const FactorMask &inputFactorUsed
57
+ , size_t nBestSize
58
+ , const string &nBestFilePath)
59
+ :m_inputFactorOrder(inputFactorOrder)
60
+ ,m_outputFactorOrder(outputFactorOrder)
61
+ ,m_inputFactorUsed(inputFactorUsed)
62
+ ,m_inputFile(NULL)
63
+ ,m_inputStream(&std::cin)
64
+ ,m_nBestStream(NULL)
65
+ ,m_outputWordGraphStream(NULL)
66
+ ,m_outputSearchGraphStream(NULL)
67
+ ,m_detailedTranslationReportingStream(NULL)
68
+ ,m_alignmentOutputStream(NULL)
69
+ {
70
+ Initialization(inputFactorOrder, outputFactorOrder
71
+ , inputFactorUsed
72
+ , nBestSize, nBestFilePath);
73
+ }
74
+
75
+ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
76
+ , const std::vector<FactorType> &outputFactorOrder
77
+ , const FactorMask &inputFactorUsed
78
+ , size_t nBestSize
79
+ , const std::string &nBestFilePath
80
+ , const std::string &inputFilePath)
81
+ :m_inputFactorOrder(inputFactorOrder)
82
+ ,m_outputFactorOrder(outputFactorOrder)
83
+ ,m_inputFactorUsed(inputFactorUsed)
84
+ ,m_inputFilePath(inputFilePath)
85
+ ,m_inputFile(new InputFileStream(inputFilePath))
86
+ ,m_nBestStream(NULL)
87
+ ,m_outputWordGraphStream(NULL)
88
+ ,m_outputSearchGraphStream(NULL)
89
+ ,m_detailedTranslationReportingStream(NULL)
90
+ ,m_alignmentOutputStream(NULL)
91
+ {
92
+ Initialization(inputFactorOrder, outputFactorOrder
93
+ , inputFactorUsed
94
+ , nBestSize, nBestFilePath);
95
+
96
+ m_inputStream = m_inputFile;
97
+ }
98
+
99
+ IOWrapper::~IOWrapper()
100
+ {
101
+ if (m_inputFile != NULL)
102
+ delete m_inputFile;
103
+ if (m_nBestStream != NULL && !m_surpressSingleBestOutput) {
104
+ // outputting n-best to file, rather than stdout. need to close file and delete obj
105
+ delete m_nBestStream;
106
+ }
107
+ if (m_outputWordGraphStream != NULL) {
108
+ delete m_outputWordGraphStream;
109
+ }
110
+ if (m_outputSearchGraphStream != NULL) {
111
+ delete m_outputSearchGraphStream;
112
+ }
113
+ delete m_detailedTranslationReportingStream;
114
+ delete m_alignmentOutputStream;
115
+ }
116
+
117
+ void IOWrapper::Initialization(const std::vector<FactorType> &/*inputFactorOrder*/
118
+ , const std::vector<FactorType> &/*outputFactorOrder*/
119
+ , const FactorMask &/*inputFactorUsed*/
120
+ , size_t nBestSize
121
+ , const std::string &nBestFilePath)
122
+ {
123
+ const StaticData &staticData = StaticData::Instance();
124
+
125
+ // n-best
126
+ m_surpressSingleBestOutput = false;
127
+
128
+ if (nBestSize > 0) {
129
+ if (nBestFilePath == "-" || nBestFilePath == "/dev/stdout") {
130
+ m_nBestStream = &std::cout;
131
+ m_surpressSingleBestOutput = true;
132
+ } else {
133
+ std::ofstream *file = new std::ofstream;
134
+ m_nBestStream = file;
135
+ file->open(nBestFilePath.c_str());
136
+ }
137
+ }
138
+
139
+ // wordgraph output
140
+ if (staticData.GetOutputWordGraph()) {
141
+ string fileName = staticData.GetParam("output-word-graph")[0];
142
+ std::ofstream *file = new std::ofstream;
143
+ m_outputWordGraphStream = file;
144
+ file->open(fileName.c_str());
145
+ }
146
+
147
+
148
+ // search graph output
149
+ if (staticData.GetOutputSearchGraph()) {
150
+ string fileName;
151
+ if (staticData.GetOutputSearchGraphExtended())
152
+ fileName = staticData.GetParam("output-search-graph-extended")[0];
153
+ else
154
+ fileName = staticData.GetParam("output-search-graph")[0];
155
+ std::ofstream *file = new std::ofstream;
156
+ m_outputSearchGraphStream = file;
157
+ file->open(fileName.c_str());
158
+ }
159
+
160
+ // detailed translation reporting
161
+ if (staticData.IsDetailedTranslationReportingEnabled()) {
162
+ const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
163
+ m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
164
+ CHECK(m_detailedTranslationReportingStream->good());
165
+ }
166
+
167
+ // sentence alignment output
168
+ if (! staticData.GetAlignmentOutputFile().empty()) {
169
+ m_alignmentOutputStream = new ofstream(staticData.GetAlignmentOutputFile().c_str());
170
+ CHECK(m_alignmentOutputStream->good());
171
+ }
172
+
173
+ }
174
+
175
+ InputType*IOWrapper::GetInput(InputType* inputType)
176
+ {
177
+ if(inputType->Read(*m_inputStream, m_inputFactorOrder)) {
178
+ if (long x = inputType->GetTranslationId()) {
179
+ if (x>=m_translationId) m_translationId = x+1;
180
+ } else inputType->SetTranslationId(m_translationId++);
181
+
182
+ return inputType;
183
+ } else {
184
+ delete inputType;
185
+ return NULL;
186
+ }
187
+ }
188
+
189
+ /***
190
+ * print surface factor only for the given phrase
191
+ */
192
+ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
193
+ bool reportSegmentation, bool reportAllFactors)
194
+ {
195
+ CHECK(outputFactorOrder.size() > 0);
196
+ const Phrase& phrase = edge.GetCurrTargetPhrase();
197
+ if (reportAllFactors == true) {
198
+ out << phrase;
199
+ } else {
200
+ size_t size = phrase.GetSize();
201
+ for (size_t pos = 0 ; pos < size ; pos++) {
202
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
203
+ out << *factor;
204
+ CHECK(factor);
205
+
206
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
207
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
208
+ CHECK(factor);
209
+
210
+ out << "|" << *factor;
211
+ }
212
+ out << " ";
213
+ }
214
+ }
215
+
216
+ // trace option "-t"
217
+ if (reportSegmentation == true && phrase.GetSize() > 0) {
218
+ out << "|" << edge.GetCurrSourceWordsRange().GetStartPos()
219
+ << "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| ";
220
+ }
221
+ }
222
+
223
+ void OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
224
+ bool reportSegmentation, bool reportAllFactors)
225
+ {
226
+ if (hypo != NULL) {
227
+ // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
228
+ OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
229
+ OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
230
+ }
231
+ }
232
+
233
+ void OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset)
234
+ {
235
+ typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
236
+ AlignVec alignments = ai.GetSortedAlignments();
237
+
238
+ AlignVec::const_iterator it;
239
+ for (it = alignments.begin(); it != alignments.end(); ++it) {
240
+ const std::pair<size_t,size_t> &alignment = **it;
241
+ out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
242
+ }
243
+
244
+ }
245
+
246
+ void OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
247
+ {
248
+ size_t targetOffset = 0;
249
+
250
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
251
+ const Hypothesis &edge = *edges[currEdge];
252
+ const TargetPhrase &tp = edge.GetCurrTargetPhrase();
253
+ size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
254
+
255
+ OutputAlignment(out, tp.GetAlignmentInfo(), sourceOffset, targetOffset);
256
+
257
+ targetOffset += tp.GetSize();
258
+ }
259
+ out << std::endl;
260
+ }
261
+
262
+ void OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
263
+ {
264
+ ostringstream out;
265
+ OutputAlignment(out, edges);
266
+
267
+ collector->Write(lineNo,out.str());
268
+ }
269
+
270
+ void OutputAlignment(OutputCollector* collector, size_t lineNo , const Hypothesis *hypo)
271
+ {
272
+ if (collector) {
273
+ std::vector<const Hypothesis *> edges;
274
+ const Hypothesis *currentHypo = hypo;
275
+ while (currentHypo) {
276
+ edges.push_back(currentHypo);
277
+ currentHypo = currentHypo->GetPrevHypo();
278
+ }
279
+
280
+ OutputAlignment(collector,lineNo, edges);
281
+ }
282
+ }
283
+
284
+ void OutputAlignment(OutputCollector* collector, size_t lineNo , const TrellisPath &path)
285
+ {
286
+ if (collector) {
287
+ OutputAlignment(collector,lineNo, path.GetEdges());
288
+ }
289
+ }
290
+
291
+ void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, bool reportSegmentation, bool reportAllFactors, std::ostream &out)
292
+ {
293
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
294
+
295
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
296
+ const Hypothesis &edge = *edges[currEdge];
297
+ OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
298
+ }
299
+ out << endl;
300
+ }
301
+
302
+ void IOWrapper::Backtrack(const Hypothesis *hypo)
303
+ {
304
+
305
+ if (hypo->GetPrevHypo() != NULL) {
306
+ VERBOSE(3,hypo->GetId() << " <= ");
307
+ Backtrack(hypo->GetPrevHypo());
308
+ }
309
+ }
310
+
311
+ void OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId*/, bool /*reportSegmentation*/, bool /*reportAllFactors*/, ostream& out)
312
+ {
313
+
314
+ for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
315
+ const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
316
+ CHECK(factor);
317
+ if (i>0) out << " " << *factor;
318
+ else out << *factor;
319
+ }
320
+ out << endl;
321
+ }
322
+
323
+
324
+ void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
325
+ {
326
+ if (hypo->GetPrevHypo()) {
327
+ OutputInput(map, hypo->GetPrevHypo());
328
+ map[hypo->GetCurrSourceWordsRange().GetStartPos()] = hypo->GetSourcePhrase();
329
+ }
330
+ }
331
+
332
+ void OutputInput(std::ostream& os, const Hypothesis* hypo)
333
+ {
334
+ size_t len = hypo->GetInput().GetSize();
335
+ std::vector<const Phrase*> inp_phrases(len, 0);
336
+ OutputInput(inp_phrases, hypo);
337
+ for (size_t i=0; i<len; ++i)
338
+ if (inp_phrases[i]) os << *inp_phrases[i];
339
+ }
340
+
341
+ void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
342
+ {
343
+ if (hypo != NULL) {
344
+ VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
345
+ VERBOSE(3,"Best path: ");
346
+ Backtrack(hypo);
347
+ VERBOSE(3,"0" << std::endl);
348
+ if (!m_surpressSingleBestOutput) {
349
+ if (StaticData::Instance().IsPathRecoveryEnabled()) {
350
+ OutputInput(cout, hypo);
351
+ cout << "||| ";
352
+ }
353
+ OutputBestSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
354
+ cout << endl;
355
+ }
356
+ } else {
357
+ VERBOSE(1, "NO BEST TRANSLATION" << endl);
358
+ if (!m_surpressSingleBestOutput) {
359
+ cout << endl;
360
+ }
361
+ }
362
+ }
363
+
364
+ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation)
365
+ {
366
+ const StaticData &staticData = StaticData::Instance();
367
+ bool labeledOutput = staticData.IsLabeledNBestList();
368
+ bool reportAllFactors = staticData.GetReportAllFactorsNBest();
369
+ bool includeAlignment = staticData.NBestIncludesAlignment();
370
+ bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
371
+
372
+ TrellisPathList::const_iterator iter;
373
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
374
+ const TrellisPath &path = **iter;
375
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
376
+
377
+ // print the surface factor of the translation
378
+ out << translationId << " ||| ";
379
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
380
+ const Hypothesis &edge = *edges[currEdge];
381
+ OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
382
+ }
383
+ out << " |||";
384
+
385
+ std::string lastName = "";
386
+ const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
387
+ for( size_t i=0; i<sff.size(); i++ ) {
388
+ if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() ) {
389
+ lastName = sff[i]->GetScoreProducerWeightShortName();
390
+ out << " " << lastName << ":";
391
+ }
392
+ vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] );
393
+ for (size_t j = 0; j<scores.size(); ++j) {
394
+ out << " " << scores[j];
395
+ }
396
+ }
397
+
398
+ const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
399
+ for( size_t i=0; i<slf.size(); i++ ) {
400
+ if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() ) {
401
+ lastName = slf[i]->GetScoreProducerWeightShortName();
402
+ out << " " << lastName << ":";
403
+ }
404
+ vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] );
405
+ for (size_t j = 0; j<scores.size(); ++j) {
406
+ out << " " << scores[j];
407
+ }
408
+ }
409
+
410
+ // translation components
411
+ const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
412
+ if (pds.size() > 0) {
413
+
414
+ for( size_t i=0; i<pds.size(); i++ ) {
415
+ size_t pd_numinputscore = pds[i]->GetNumInputScores();
416
+ vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
417
+ for (size_t j = 0; j<scores.size(); ++j){
418
+
419
+ if (labeledOutput && (i == 0) ){
420
+ if ((j == 0) || (j == pd_numinputscore)){
421
+ lastName = pds[i]->GetScoreProducerWeightShortName(j);
422
+ out << " " << lastName << ":";
423
+ }
424
+ }
425
+ out << " " << scores[j];
426
+ }
427
+ }
428
+ }
429
+
430
+ // generation
431
+ const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
432
+ if (gds.size() > 0) {
433
+
434
+ for( size_t i=0; i<gds.size(); i++ ) {
435
+ size_t pd_numinputscore = gds[i]->GetNumInputScores();
436
+ vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
437
+ for (size_t j = 0; j<scores.size(); ++j){
438
+
439
+ if (labeledOutput && (i == 0) ){
440
+ if ((j == 0) || (j == pd_numinputscore)){
441
+ lastName = gds[i]->GetScoreProducerWeightShortName(j);
442
+ out << " " << lastName << ":";
443
+ }
444
+ }
445
+ out << " " << scores[j];
446
+ }
447
+ }
448
+ }
449
+
450
+ // total
451
+ out << " ||| " << path.GetTotalScore();
452
+
453
+ //phrase-to-phrase alignment
454
+ if (includeAlignment) {
455
+ out << " |||";
456
+ for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
457
+ const Hypothesis &edge = *edges[currEdge];
458
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
459
+ WordsRange targetRange = path.GetTargetWordsRange(edge);
460
+ out << " " << sourceRange.GetStartPos();
461
+ if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
462
+ out << "-" << sourceRange.GetEndPos();
463
+ }
464
+ out<< "=" << targetRange.GetStartPos();
465
+ if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
466
+ out<< "-" << targetRange.GetEndPos();
467
+ }
468
+ }
469
+ }
470
+
471
+ if (includeWordAlignment) {
472
+ out << " ||| ";
473
+ for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
474
+ const Hypothesis &edge = *edges[currEdge];
475
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
476
+ WordsRange targetRange = path.GetTargetWordsRange(edge);
477
+ const int sourceOffset = sourceRange.GetStartPos();
478
+ const int targetOffset = targetRange.GetStartPos();
479
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignmentInfo();
480
+
481
+ OutputAlignment(out, ai, sourceOffset, targetOffset);
482
+
483
+ }
484
+ }
485
+
486
+ if (StaticData::Instance().IsPathRecoveryEnabled()) {
487
+ out << "|||";
488
+ OutputInput(out, edges[0]);
489
+ }
490
+
491
+ out << endl;
492
+ }
493
+
494
+
495
+ out <<std::flush;
496
+ }
497
+
498
+ void OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId)
499
+ {
500
+ for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) {
501
+ out << translationId;
502
+ out << " |||";
503
+ const vector<Word> mbrHypo = si->GetWords();
504
+ for (size_t i = 0 ; i < mbrHypo.size() ; i++) {
505
+ const Factor *factor = mbrHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
506
+ if (i>0) out << " " << *factor;
507
+ else out << *factor;
508
+ }
509
+ out << " |||";
510
+ out << " map: " << si->GetMapScore();
511
+ out << " w: " << mbrHypo.size();
512
+ const vector<float>& ngramScores = si->GetNgramScores();
513
+ for (size_t i = 0; i < ngramScores.size(); ++i) {
514
+ out << " " << ngramScores[i];
515
+ }
516
+ out << " ||| " << si->GetScore();
517
+
518
+ out << endl;
519
+ }
520
+ }
521
+
522
+
523
+ void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solutions,long translationId)
524
+ {
525
+ OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId);
526
+ }
527
+
528
+ bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source)
529
+ {
530
+ delete source;
531
+ switch(inputType) {
532
+ case SentenceInput:
533
+ source = ioWrapper.GetInput(new Sentence);
534
+ break;
535
+ case ConfusionNetworkInput:
536
+ source = ioWrapper.GetInput(new ConfusionNet);
537
+ break;
538
+ case WordLatticeInput:
539
+ source = ioWrapper.GetInput(new WordLattice);
540
+ break;
541
+ default:
542
+ TRACE_ERR("Unknown input type: " << inputType << "\n");
543
+ }
544
+ return (source ? true : false);
545
+ }
546
+
547
+
548
+
549
+ IOWrapper *GetIOWrapper(const StaticData &staticData)
550
+ {
551
+ IOWrapper *ioWrapper;
552
+ const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
553
+ ,&outputFactorOrder = staticData.GetOutputFactorOrder();
554
+ FactorMask inputFactorUsed(inputFactorOrder);
555
+
556
+ // io
557
+ if (staticData.GetParam("input-file").size() == 1) {
558
+ VERBOSE(2,"IO from File" << endl);
559
+ string filePath = staticData.GetParam("input-file")[0];
560
+
561
+ ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
562
+ , staticData.GetNBestSize()
563
+ , staticData.GetNBestFilePath()
564
+ , filePath);
565
+ } else {
566
+ VERBOSE(1,"IO from STDOUT/STDIN" << endl);
567
+ ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
568
+ , staticData.GetNBestSize()
569
+ , staticData.GetNBestFilePath());
570
+ }
571
+ ioWrapper->ResetTranslationId();
572
+
573
+ IFVERBOSE(1)
574
+ PrintUserTime("Created input-output object");
575
+
576
+ return ioWrapper;
577
+ }
578
+
579
+ }
580
+
mosesdecoder/contrib/relent-filter/src/Jamfile ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ alias deps : ../../../moses/src//moses ;
2
+
3
+ exe calcDivergence : Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp LatticeMBR.cpp RelativeEntropyCalc.cpp deps ;
4
+
5
+ alias programs : calcDivergence ;
6
+
mosesdecoder/contrib/relent-filter/src/Main.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*********************************************************************
2
+ Relative Entropy-based Phrase table Pruning
3
+ Copyright (C) 2012 Wang Ling
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without modification,
7
+ are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice,
10
+ this list of conditions and the following disclaimer.
11
+ * Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+ * Neither the name of the University of Edinburgh nor the names of its contributors
15
+ may be used to endorse or promote products derived from this software
16
+ without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21
+ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22
+ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
26
+ IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
+ POSSIBILITY OF SUCH DAMAGE.
29
+ ***********************************************************************/
30
+
31
+ #ifndef moses_cmd_Main_h
32
+ #define moses_cmd_Main_h
33
+
34
+ #include "StaticData.h"
35
+
36
+ class IOWrapper;
37
+
38
+ int main(int argc, char* argv[]);
39
+ #endif
mosesdecoder/contrib/relent-filter/src/TranslationAnalysis.cpp ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ #include <iostream>
4
+ #include <sstream>
5
+ #include <algorithm>
6
+ #include "StaticData.h"
7
+ #include "Hypothesis.h"
8
+ #include "TranslationAnalysis.h"
9
+
10
+ using namespace Moses;
11
+
12
+ namespace TranslationAnalysis
13
+ {
14
+
15
+ void PrintTranslationAnalysis(const TranslationSystem* system, std::ostream &os, const Hypothesis* hypo)
16
+ {
17
+ os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
18
+ std::vector<const Hypothesis*> translationPath;
19
+
20
+ while (hypo) {
21
+ translationPath.push_back(hypo);
22
+ hypo = hypo->GetPrevHypo();
23
+ }
24
+
25
+ std::reverse(translationPath.begin(), translationPath.end());
26
+ std::vector<std::string> droppedWords;
27
+ std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
28
+ if(tpi == translationPath.end())
29
+ return;
30
+ ++tpi; // skip initial translation state
31
+ std::vector<std::string> sourceMap;
32
+ std::vector<std::string> targetMap;
33
+ std::vector<unsigned int> lmAcc(0);
34
+ size_t lmCalls = 0;
35
+ bool doLMStats = ((*tpi)->GetLMStats() != 0);
36
+ if (doLMStats)
37
+ lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
38
+ for (; tpi != translationPath.end(); ++tpi) {
39
+ std::ostringstream sms;
40
+ std::ostringstream tms;
41
+ std::string target = (*tpi)->GetTargetPhraseStringRep();
42
+ std::string source = (*tpi)->GetSourcePhraseStringRep();
43
+ WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
44
+ WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
45
+ const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignmentInfo();
46
+ // language model backoff stats,
47
+ if (doLMStats) {
48
+ std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
49
+ std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
50
+ std::vector<unsigned int>::iterator acc = lmAcc.begin();
51
+
52
+ for (; i != lmstats.end(); ++i, ++acc) {
53
+ std::vector<unsigned int>::iterator j = i->begin();
54
+ lmCalls += i->size();
55
+ for (; j != i->end(); ++j) {
56
+ (*acc) += *j;
57
+ }
58
+ }
59
+ }
60
+
61
+ bool epsilon = false;
62
+ if (target == "") {
63
+ target="<EPSILON>";
64
+ epsilon = true;
65
+ droppedWords.push_back(source);
66
+ }
67
+ os << " SOURCE: " << swr << " " << source << std::endl
68
+ << " TRANSLATED AS: " << target << std::endl
69
+ << " WORD ALIGNED: " << alignmentInfo << std::endl;
70
+ size_t twr_i = twr.GetStartPos();
71
+ size_t swr_i = swr.GetStartPos();
72
+ if (!epsilon) {
73
+ sms << twr_i;
74
+ }
75
+ if (epsilon) {
76
+ tms << "del(" << swr_i << ")";
77
+ } else {
78
+ tms << swr_i;
79
+ }
80
+ swr_i++;
81
+ twr_i++;
82
+ for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
83
+ sms << '-' << twr_i;
84
+ }
85
+ for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
86
+ tms << '-' << swr_i;
87
+ }
88
+ if (!epsilon) targetMap.push_back(sms.str());
89
+ sourceMap.push_back(tms.str());
90
+ }
91
+ std::vector<std::string>::iterator si = sourceMap.begin();
92
+ std::vector<std::string>::iterator ti = targetMap.begin();
93
+ os << std::endl << "SOURCE/TARGET SPANS:";
94
+ os << std::endl << " SOURCE:";
95
+ for (; si != sourceMap.end(); ++si) {
96
+ os << " " << *si;
97
+ }
98
+ os << std::endl << " TARGET:";
99
+ for (; ti != targetMap.end(); ++ti) {
100
+ os << " " << *ti;
101
+ }
102
+ os << std::endl << std::endl;
103
+ if (doLMStats && lmCalls > 0) {
104
+ std::vector<unsigned int>::iterator acc = lmAcc.begin();
105
+ const LMList& lmlist = system->GetLanguageModels();
106
+ LMList::const_iterator i = lmlist.begin();
107
+ for (; acc != lmAcc.end(); ++acc, ++i) {
108
+ char buf[256];
109
+ sprintf(buf, "%.4f", (float)(*acc)/(float)lmCalls);
110
+ os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
111
+ }
112
+ }
113
+
114
+ if (droppedWords.size() > 0) {
115
+ std::vector<std::string>::iterator dwi = droppedWords.begin();
116
+ os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
117
+ for (; dwi != droppedWords.end(); ++dwi) {
118
+ os << "\tdropped=" << *dwi << std::endl;
119
+ }
120
+ }
121
+ os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED): ";
122
+ StaticData::Instance().GetScoreIndexManager().PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
123
+ os << std::endl;
124
+ }
125
+
126
+ }
mosesdecoder/contrib/relent-filter/src/mbr.h ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // $Id$
2
+
3
+ /***********************************************************************
4
+ Moses - factored phrase-based language decoder
5
+ Copyright (C) 2006 University of Edinburgh
6
+
7
+ This library is free software; you can redistribute it and/or
8
+ modify it under the terms of the GNU Lesser General Public
9
+ License as published by the Free Software Foundation; either
10
+ version 2.1 of the License, or (at your option) any later version.
11
+
12
+ This library is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
+ Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public
18
+ License along with this library; if not, write to the Free Software
19
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ ***********************************************************************/
21
+
22
+ #ifndef moses_cmd_mbr_h
23
+ #define moses_cmd_mbr_h
24
+
25
+ const Moses::TrellisPath doMBR(const Moses::TrellisPathList& nBestList);
26
+ void GetOutputFactors(const Moses::TrellisPath &path, std::vector <const Moses::Factor*> &translation);
27
+ float calculate_score(const std::vector< std::vector<const Moses::Factor*> > & sents, int ref, int hyp, std::vector < std::map < std::vector < const Moses::Factor *>, int > > & ngram_stats );
28
+ #endif
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_ARCHETYPESET_
25
+ #define _NL_ARCHETYPESET_
26
+
27
+ #include <boost/thread/thread.hpp>
28
+ #include <boost/thread/mutex.hpp>
29
+ #include <boost/bind.hpp>
30
+ #include "nl-minheap.h"
31
+
32
+ ////////////////////////////////////////////////////////////////////////////////
33
+
34
+ template <class S, class T>
35
+ class Scored : public T {
36
+ public:
37
+ S scr;
38
+ Scored<S,T> ( ) : T() , scr() { }
39
+ Scored<S,T> ( S s ) : T() , scr(s) { }
40
+ Scored<S,T> ( S s, T& t ) : T(t), scr(s) { }
41
+ S& setScore() { return scr; }
42
+ S getScore() const { return scr; }
43
+ };
44
+
45
+ ////////////////////////////////////////////////////////////////////////////////
46
+
47
+ template<char* SD1,class I,char* SD2,class T,char* SD3>
48
+ class Numbered : public T {
49
+ private:
50
+ // Data members...
51
+ I i;
52
+ public:
53
+ // Constructor / destructor methods...
54
+ Numbered<SD1,I,SD2,T,SD3> ( ) { }
55
+ Numbered<SD1,I,SD2,T,SD3> ( char* ps ) { ps>>*this>>"\0"; }
56
+ Numbered<SD1,I,SD2,T,SD3> ( const I& iA, const T& tA ) : T(tA), i(iA) { }
57
+ // Specification methods...
58
+ I& setNumber ( ) { return i; }
59
+ T& setT ( ) { return *this; }
60
+ // Extraction methods...
61
+ const I& getNumber ( ) const { return i; }
62
+ const T& getT ( ) const { return *this; }
63
+ // Input / output methods...
64
+ friend ostream& operator<< ( ostream& os, const Numbered<SD1,I,SD2,T,SD3>& rv ) { return os<<SD1<<rv.i<<SD2<<rv.getT()<<SD3; }
65
+ friend String& operator<< ( String& str, const Numbered<SD1,I,SD2,T,SD3>& rv ) { return str<<SD1<<rv.i<<SD2<<rv.getT()<<SD3; }
66
+ friend pair<StringInput,Numbered<SD1,I,SD2,T,SD3>*> operator>> ( StringInput ps, Numbered<SD1,I,SD2,T,SD3>& rv ) { return pair<StringInput,Numbered<SD1,I,SD2,T,SD3>*>(ps,&rv); }
67
+ friend StringInput operator>> ( pair<StringInput,Numbered<SD1,I,SD2,T,SD3>*> delimbuff, const char* psPostDelim ) {
68
+ return ( (SD3[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->i>>SD2>>delimbuff.second->setT()>>psPostDelim
69
+ : delimbuff.first>>SD1>>delimbuff.second->i>>SD2>>delimbuff.second->setT()>>SD3>>psPostDelim );
70
+ }
71
+ };
72
+
73
+ ////////////////////////////////////////////////////////////////////////////////
74
+
75
+ template<class V>
76
+ class ArchetypeSet : public multimap<typename V::ElementType,Numbered<psX,int,psBar,V,psX> > {
77
+ private:
78
+ // Static data members...
79
+ static const int FIRST_INDEX_TO_CHECK = 0;
80
+ typedef Numbered<psX,int,psBar,V,psX> NV;
81
+ typedef multimap<typename V::ElementType,NV> MapType;
82
+ // Data members...
83
+ MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const NV> > > > hsivCalc;
84
+ public:
85
+ ArchetypeSet<V> ( ) { }
86
+ ArchetypeSet<V> ( const ArchetypeSet<V>& aa ) : MapType(aa) { cerr<<"\nCOPY!!!!\n\n"; }
87
+ ArchetypeSet<V>& operator= ( const ArchetypeSet<V>& aa ) { cerr<<"\nCOPY2!!!!\n\n"; MapType::operator=(aa); return *this; }
88
+ // Specification methods...
89
+ void add ( const V& );
90
+ // Extraction methods...
91
+ bool isEmpty ( ) const { return MapType::empty(); }
92
+ pair<typename V::ElementType,int> getDistanceOfNearest ( const V& ) const;
93
+ void dump ( ) { for(typename MapType::const_iterator ii=MapType::begin(); ii!=MapType::end(); ii++) cerr<<ii->second<<"\n"; }
94
+ };
95
+
96
+ ////////////////////
97
+ template<class V>
98
+ void ArchetypeSet<V>::add ( const V& v ) {
99
+ //cerr<<"adding "<<v.get(FIRST_INDEX_TO_CHECK)<<" "<<MapType::size()<<" "<<v<<"\n";
100
+ MapType::insert ( pair<typename V::ElementType,NV>(v.get(FIRST_INDEX_TO_CHECK),NV(MapType::size()+1,v) ) );
101
+ ////cerr<<"adding "<<v.second.get(1)<<" ln"<<MapType::lower_bound(v.second.get(1))->second.lineNum.toInt()<<"\n";
102
+ }
103
+
104
+ ////////////////////
105
+ template<class V>
106
+ pair<typename V::ElementType,int> ArchetypeSet<V>::getDistanceOfNearest ( const V& v ) const {
107
+ //const Scored<typename V::ElementType,pair<int,SafePtr<const V> > > sipvDummy ( DBL_MAX );
108
+ //MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const V> > > > hsiv ( MapType::size()+1, sipvDummy );
109
+ MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const NV> > > >& hsiv =
110
+ const_cast<MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const NV> > > >&> ( hsivCalc );
111
+ hsiv.clear();
112
+
113
+ typename MapType::const_iterator iUpper = MapType::upper_bound(v.get(FIRST_INDEX_TO_CHECK));
114
+ typename MapType::const_iterator iLower = iUpper; if(iLower!=MapType::begin())iLower--;
115
+ ////cerr<<"seeking "<<v.get(0)<<" (upper=ln"<<(&iUpper->second)<<" "<<((iUpper!=MapType::end())?iUpper->first:-1)<<", lower=ln"<<&iLower->second<<" "<<iLower->first<<")\n";
116
+ int iNext = 0;
117
+ if ( iUpper!=MapType::end() ) {
118
+ hsiv.set(iNext).first = FIRST_INDEX_TO_CHECK;
119
+ hsiv.set(iNext).second = SafePtr<const NV> ( iUpper->second );
120
+ typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
121
+ hsiv.set(iNext).setScore() = d;
122
+ //hsiv.set(iNext).setScore() = v.getMarginalDistance ( hsiv.getMin().first, iUpper->second.second );
123
+ ////int j =
124
+ hsiv.fixDecr(iNext);
125
+ ////cerr<<" adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
126
+ iNext++;
127
+ ////for(int i=0;i<iNext;i++) cerr<<" "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
128
+ }
129
+ hsiv.set(iNext).first = FIRST_INDEX_TO_CHECK;
130
+ hsiv.set(iNext).second = SafePtr<const NV> ( iLower->second );
131
+ typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
132
+ hsiv.set(iNext).setScore() = d;
133
+ //hsiv.set(iNext).setScore() = v.getMarginalDistance ( hsiv.getMin().first, iLower->second.second );
134
+ ////int j =
135
+ hsiv.fixDecr(iNext);
136
+ ////cerr<<" adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
137
+ iNext++;
138
+ ////for(int i=0;i<iNext;i++) cerr<<" "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
139
+ while ( hsiv.getMin().first < V::SIZE-1 ) {
140
+ typename V::ElementType d = v.getMarginalDistance ( ++hsiv.setMin().first, hsiv.getMin().second.getRef() );
141
+ hsiv.setMin().setScore() += d;
142
+ ////cerr<<" matching ln"<<&hsiv.getMin().second.getRef()<<" i="<<hsiv.setMin().first<<" marg-dist="<<d<<" new-score="<<hsiv.getMin().getScore();
143
+ ////int j =
144
+ hsiv.fixIncr(0);
145
+ ////cerr<<" new-pos="<<j<<"\n";
146
+ ////if(j!=0) for(int i=0;i<iNext;i++) cerr<<" "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
147
+ if ( iUpper!=MapType::end() && &hsiv.getMin().second.getRef() == &iUpper->second ) {
148
+ iUpper++;
149
+ if ( iUpper!=MapType::end() ) {
150
+ hsiv.set(iNext).first = FIRST_INDEX_TO_CHECK;
151
+ hsiv.set(iNext).second = SafePtr<const NV> ( iUpper->second );
152
+ typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
153
+ hsiv.set(iNext).setScore() = d;
154
+ ////int j =
155
+ hsiv.fixDecr(iNext);
156
+ ////cerr<<" adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
157
+ iNext++;
158
+ ////for(int i=0;i<iNext;i++) cerr<<" "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
159
+ }
160
+ }
161
+ if ( iLower!=MapType::begin() && &hsiv.getMin().second.getRef() == &iLower->second ) {
162
+ iLower--;
163
+ hsiv.set(iNext).first = FIRST_INDEX_TO_CHECK;
164
+ hsiv.set(iNext).second = SafePtr<const NV> ( iLower->second );
165
+ typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
166
+ hsiv.set(iNext).setScore() = d;
167
+ ////int j =
168
+ hsiv.fixDecr(iNext);
169
+ ////cerr<<" adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
170
+ iNext++;
171
+ ////for(int i=0;i<iNext;i++) cerr<<" "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
172
+ }
173
+ }
174
+ return pair<typename V::ElementType,int> ( hsiv.getMin().getScore(), hsiv.getMin().second.getRef().getNumber() );
175
+ }
176
+
177
+
178
+ ////////////////////////////////////////////////////////////////////////////////
179
+
180
+ #endif //_NL_ARCHITYPESET_
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-beam.h ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_BEAM__
25
+ #define _NL_BEAM__
26
+
27
+ #include "nl-heap.h"
28
+ #include "nl-hash.h"
29
+ //#include <boost/thread/thread.hpp>
30
+ //#include <boost/thread/mutex.hpp>
31
+ #include <tr1/unordered_map>
32
+
33
+ ////////////////////////////////////////////////////////////////////////////////
34
+
35
+ /*
36
+ template <class R>
37
+ class SafePtr {
38
+ private:
39
+ R* pr;
40
+ static R rDummy;
41
+ public:
42
+ SafePtr<R> ( ) : pr(NULL) { }
43
+ SafePtr<R> ( R& r ) : pr(&r) { }
44
+ bool operator== ( const SafePtr<R>& spr ) const { return(pr==spr.pr); }
45
+ bool operator!= ( const SafePtr<R>& spr ) const { return(!(pr==spr.pr)); }
46
+ R& set ( ) { assert(pr); return (pr!=NULL) ? *pr : rDummy; }
47
+ const R& get ( ) const { return (pr!=NULL) ? *pr : rDummy; }
48
+ };
49
+ template <class R>
50
+ R SafePtr<R>::rDummy = R();
51
+
52
+ template <class S, class R>
53
+ class ScoredPtr : public SafePtr<R> {
54
+ public:
55
+ S scr;
56
+ ScoredPtr<S,R> ( ) : SafePtr<R>() , scr() { }
57
+ ScoredPtr<S,R> ( S s, R& r ) : SafePtr<R>(r), scr(s) { }
58
+ S& setScore() { return scr; }
59
+ S getScore() const { return scr; }
60
+ };
61
+ */
62
+
63
+ ////////////////////////////////////////////////////////////////////////////////
64
+
65
+ template <class S, class C>
66
+ class ScoredIter : public C::iterator {
67
+ private:
68
+ //static C cDummy;
69
+ S s;
70
+ public:
71
+ ScoredIter<S,C> ( ) : C::iterator(0,0), s() { }
72
+ ScoredIter<S,C> ( S s1, const typename C::iterator& i1 ) : C::iterator(i1), s(s1) { }
73
+ //ScoredIter<S,C> ( ) : C::iterator(cDummy.end()), s() { }
74
+ S& setScore() { return s; }
75
+ S getScore() const { return s; }
76
+ };
77
+ //template <class S, class C> C ScoredIter<S,C>::cDummy;
78
+
79
+ ////////////////////////////////////////////////////////////////////////////////
80
+
81
+ template <class S,class K,class D>
82
+ class Beam {
83
+ public:
84
+ typedef std::pair<int,D> ID;
85
+ typedef std::pair<K,std::pair<int,D> > KID;
86
+ typedef std::tr1::unordered_multimap<K,ID,SimpleHashFn<K>,SimpleHashEqual<K> > BeamMap;
87
+ typedef MinHeap<ScoredIter<S,BeamMap> > BeamHeap;
88
+ private:
89
+ BeamMap mkid;
90
+ BeamHeap hspkid;
91
+ public:
92
+ // Constructor methods...
93
+ Beam<S,K,D> ( int i ) : mkid(2*i), hspkid(i) { for(int j=0;j<i;j++)set(j,K(),D(),S()); }
94
+ // Specification methods...
95
+ bool tryAdd ( const K&, const D&, const S& ) ;
96
+ void set ( int i, const K& k, const D& d, const S& s ) { hspkid.set(i) = ScoredIter<S,BeamMap>(s,mkid.insert(KID(k,ID(i,d)))); }
97
+ // Extraction methods...
98
+ const ScoredIter<S,BeamMap>& getMin ( ) const { return hspkid.getMin(); }
99
+ const ScoredIter<S,BeamMap>& get ( int i ) const { return hspkid.get(i); }
100
+ void sort ( SafeArray1D<Id<int>,std::pair<std::pair<K,D>,S> >& ) ;
101
+ void write(FILE *pf){
102
+ /* for (typename BeamMap::const_iterator i = mkid.begin(); i != mkid.end(); i++){
103
+ i->first.write(pf);
104
+ fprintf(pf, " %d ", i->second.first);
105
+ // i->second.second.write(pf);
106
+ fprintf(pf, "\n");
107
+ }
108
+ */
109
+ for(int i=0; i<hspkid.getSize(); i++){
110
+ fprintf(pf, "%d ", hspkid.get(i).getScore().toInt());
111
+ hspkid.get(i)->first.write(pf);
112
+ fprintf(pf, "\n");
113
+ }
114
+ }
115
+ };
116
+
117
+ template <class S,class K,class D>
118
+ bool Beam<S,K,D>::tryAdd ( const K& k, const D& d, const S& s ) {
119
+ // If score good enough to get into beam...
120
+ if ( s > hspkid.getMin().getScore() ) {
121
+ typename BeamMap::const_iterator i = mkid.find(k);
122
+ // If key in beam already...
123
+ if ( i != mkid.end() ) {
124
+ // If same key in beam now has better score...
125
+ if ( s > hspkid.get(i->second.first).getScore() ) {
126
+ // Update score (and data associated with that score)...
127
+ hspkid.set(i->second.first).setScore() = s;
128
+ hspkid.set(i->second.first)->second.second = d;
129
+ // Update heap...
130
+ int iStart = i->second.first; int iDeeper = hspkid.fixIncr(iStart);
131
+ // Fix pointers in hash...
132
+ for ( int j = iDeeper+1; j>=iStart+1; j/=2 ) hspkid.set(j-1)->second.first = j-1;
133
+ }
134
+ }
135
+ // If x not in beam already, add...
136
+ else {
137
+ // Remove min from map (via pointer in heap)...
138
+ mkid.erase ( hspkid.getMin() );
139
+ // Insert new entry at min...
140
+ set(0,k,d,s);
141
+ // Update heap...
142
+ int iStart = 0; int iDeeper = hspkid.fixIncr(iStart);
143
+ // Fix pointers in hash...
144
+ for ( int j = iDeeper+1; j>=iStart+1; j/=2 ) hspkid.set(j-1)->second.first = j-1;
145
+ }
146
+ }
147
+ return ( LogProb() != hspkid.getMin().getScore() ); // true = beam full, false = beam still has gaps
148
+ }
149
+
150
+ template <class S,class K,class D>
151
+ void Beam<S,K,D>::sort ( SafeArray1D<Id<int>,std::pair<std::pair<K,D>,S> >& akdsOut ) {
152
+ for ( int i=0; i<hspkid.getSize(); i++ ) {
153
+ akdsOut.set(hspkid.getSize()-i-1).first.first = hspkid.getMin()->first; // copy min key to output key.
154
+ akdsOut.set(hspkid.getSize()-i-1).first.second = hspkid.getMin()->second.second; // copy min dat to output dat.
155
+ akdsOut.set(hspkid.getSize()-i-1).second = hspkid.getMin().getScore(); // copy min scr to output scr.
156
+ hspkid.setMin().setScore() = LogProb(1); // get min out of the way.
157
+ hspkid.fixIncr(0); // repair heap.
158
+ }
159
+ }
160
+
161
+ ////////////////////////////////////////////////////////////////////////////////
162
+
163
+
164
+ #endif //_NL_BEAM__
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-const.h ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef __NL_CONST_H_
25
+ #define __NL_CONST_H_
26
+
27
+ //#include <getopt.h>
28
+
29
+ ///////////////////////////////////////////////////////////////////////////////
30
+ // type defs...
31
+
32
+ typedef short int16 ;
33
+ typedef int16 Sample ;
34
+ //typedef int Mag ;
35
+ //typedef int Gam ;
36
+
37
+ ///////////////////////////////////////////////////////////////////////////////
38
+ // Misc consts...
39
+
40
+ int max(int i,int j) {return (i>j)?i:j;}
41
+ int min(int i,int j) {return (i<j)?i:j;}
42
+
43
+ inline size_t rotLeft (const size_t& n, const size_t& i) { return (n << i) | (n >> (sizeof(size_t) - i)); }
44
+ inline size_t rotRight(const size_t& n, const size_t& i) { return (n >> i) | (n << (sizeof(size_t) - i)); }
45
+
46
+ //inline float abs ( float a ) { return (a>=0)?a:-a; }
47
+ /*
48
+
49
+ ///////////////////////////////////////////////////////////////////////////////
50
+ // Basic phone recognition consts...
51
+ static const int NUM_SAMPLES_PER_FRAME = 512;
52
+ #ifdef OLD_Q
53
+ static const int FRAME_RATE_IN_SAMPLES = 160; //// 80
54
+ #else
55
+ static const int FRAME_RATE_IN_SAMPLES = 256; //// 160; //// 80
56
+ #endif
57
+
58
+ static const int LOG_NUM_FREQUENCIES = 8;
59
+ static const int LOG_NUM_QUEFRENCIES = LOG_NUM_FREQUENCIES;
60
+ static const int NUM_FREQUENCIES = 1<<LOG_NUM_FREQUENCIES;
61
+ static const int NUM_QUEFRENCIES = 1<<LOG_NUM_QUEFRENCIES;
62
+
63
+ ///////////////////////////////////////////////////////////////////////////////
64
+ // Output format globals
65
+ static bool OUTPUT_QUIET = false;
66
+
67
+ ///////////////////////////////////////////////////////////////////////////////
68
+ // H/O consts...
69
+
70
+ static int LOG_MAX_SIGNS = 13; // NOTE: bit limit: LOG_MAX_SIGNS + 3*LOG_MAX_ENTS < 31
71
+ static int MAX_SIGNS = 1<<LOG_MAX_SIGNS;
72
+ static int MAX_IVS = 100;
73
+
74
+ ///////////////////////////////////////////////////////////////////////////////
75
+ // H sign recognition consts...
76
+
77
+ static double INSERT_PENALTY = 1.0; // MULTIPLICATIVE
78
+ static int MAX_FANOUT = 150;
79
+ static const int MAX_BOOLS = 2;
80
+ static const int MAX_TRUTHVALS = 3;
81
+
82
+ ///////////////////////////////////////////////////////////////////////////////
83
+ // H sem recognition consts...
84
+
85
+ static int LOG_MAX_ENTS = 6;
86
+ static int MAX_ENTS = 1<<LOG_MAX_ENTS;
87
+ static int MAX_CONTEXTS = 100;
88
+ static int MAX_RELNS = 100;
89
+ static int MAX_CATS = 1000;
90
+
91
+ ///////////////////////////////////////////////////////////////////////////////
92
+ // Reader consts...
93
+
94
+ static int MAX_READER_FIELDS = 50; //62442; //20;
95
+ static int LENGTH_READER_FIELDS = 1024; //512; //256;
96
+
97
+ ///////////////////////////////////////////////////////////////////////////////
98
+ // HMM consts...
99
+
100
+ //static const int BEAM_WIDTH = 4095;
101
+ static int BEAM_WIDTH = 63; //255;
102
+ //static const int BEAM_WIDTH = 1023;
103
+
104
+ ///////////////////////////////////////////////////////////////////////////////
105
+
106
+ static const int NUM_MFCC_FILTERS = 40;
107
+ static const int NUM_CEPSTRUM = 13;
108
+ static const int WEIGHT_SIZE = 8;
109
+ static const int MFCC_SIZE = 3 * NUM_CEPSTRUM;
110
+ static const float MIN_FREQUENCY = 0; //130.0;
111
+ static const float MAX_FREQUENCY = 8000.0; //Max allowed freq in signal is 16000Hz
112
+ static const int MEAN_SIZE = (WEIGHT_SIZE * MFCC_SIZE);
113
+ //Use a diagonal matrix for now
114
+ //static const int COVARIANCE_SIZE = (MEAN_SIZE * MFCC_SIZE);
115
+ static const int COVARIANCE_SIZE = MEAN_SIZE;
116
+ static const int MAX_NUM_FRAMES = 10000;
117
+ static const float PREEMPASIZE_FACTOR = 0.97;
118
+ static const int NUM_FFT_POINTS = NUM_SAMPLES_PER_FRAME;
119
+ static const int SAMPLING_RATE = 16000;
120
+
121
+ static const bool DEBUG_MODE = false;
122
+
123
+ */
124
+
125
+ #endif /*__NL_CONST_H_*/
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-crf.h ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_CRF__
25
+ #define _NL_CRF__
26
+
27
+ #include "nl-safeids.h"
28
+ #include "nl-probmodel.h"
29
+ #include <cassert>
30
+ #include <math.h>
31
+
32
+ ////////////////////////////////////////////////////////////////////////////////
33
+ ////////////////////////////////////////////////////////////////////////////////
34
+ //
35
+ // CRFModeledRV<Y>
36
+ //
37
+ ////////////////////////////////////////////////////////////////////////////////
38
+
39
+ template <class Y,class X1,class X2>
40
+ class CRF3DModeledRV : public Y {
41
+
42
+ private:
43
+
44
+ // Static data members...
45
+ static bool bModel; // whether model defined yet
46
+ static int cardGlb; // global dependencies (used in all potentials)
47
+ static int cardOff; // offset positions in site var sequence
48
+ static int cardSh; // clique shapes at each offset
49
+ static int cardCnd; // possible condition clique configs incl non-site vars in high bits
50
+ static int bitsVal; // size in bits of val part of clique config
51
+ static int bitsValSite; // size in bits of each site var in val clique config
52
+ static SafeArray5D<Id<int>,int,int,int,int,float> aaaaaPotentials; // the model
53
+ /* static SafeArray3D<int> aaaCnds; // calc features only once per frame */
54
+
55
+ public:
56
+
57
+ // Static extraction methods...
58
+ static const float& getPotential ( Id<int> glb, int off, int sh, int configCnd, int configVal )
59
+ { assert(bModel); return aaaaaPotentials.set(glb,off,sh,configCnd,configVal); }
60
+
61
+ // Static specification methods...
62
+ static void init ( int g, int o, int s, int c, int v, int b )
63
+ { cardGlb=g; cardOff=o; cardSh=s; cardCnd=c; bitsVal=v; bitsValSite=b; }
64
+ static float& setPotential ( Id<int> glb, int off, int sh, int configCnd, int configVal )
65
+ { if(!bModel){aaaaaPotentials.init(cardGlb,cardOff,cardSh,cardCnd,1<<bitsVal,1.0); bModel=true;}
66
+ return aaaaaPotentials.set(glb,off,sh,configCnd,configVal); }
67
+ static void updateObservCliques ( const X1&, const X2& ) ;
68
+
69
+ // Static input / output methods...
70
+ static bool readModelFields ( char*[], int ) ;
71
+
72
+ // Extraction methods...
73
+ Prob getProb ( const X1&, const X2& ) const ;
74
+
75
+ // Input / output methods...
76
+ void writeObservCliqueConfigs ( FILE*, int, const char*, const X1&, const X2&, bool ) const ;
77
+ };
78
+
79
+ ////////////////////////////////////////////////////////////////////////////////
80
+
81
+ template <class Y,class X1,class X2> bool CRF3DModeledRV<Y,X1,X2>::bModel = false;
82
+ template <class Y,class X1,class X2> int CRF3DModeledRV<Y,X1,X2>::cardGlb = 0;
83
+ template <class Y,class X1,class X2> int CRF3DModeledRV<Y,X1,X2>::cardOff = 0;
84
+ template <class Y,class X1,class X2> int CRF3DModeledRV<Y,X1,X2>::cardSh = 0;
85
+ template <class Y,class X1,class X2> int CRF3DModeledRV<Y,X1,X2>::cardCnd = 0;
86
+ template <class Y,class X1,class X2> int CRF3DModeledRV<Y,X1,X2>::bitsVal = 0;
87
+ template <class Y,class X1,class X2> int CRF3DModeledRV<Y,X1,X2>::bitsValSite = 0;
88
+ template <class Y,class X1,class X2> SafeArray5D<Id<int>,int,int,int,int,float> CRF3DModeledRV<Y,X1,X2>::aaaaaPotentials;
89
+ /* template <class Y,class X1,class X2> SafeArray3D<int> CRF3DModeledRV<Y,X1,X2>::aaaCnds; */
90
+
91
+ ////////////////////////////////////////////////////////////////////////////////
92
+
93
+ template <class Y,class X1,class X2>
94
+ Prob CRF3DModeledRV<Y,X1,X2>::getProb( const X1& x1, const X2& x2 ) const {
95
+
96
+ SafeArray2D<int,int,int> aaCnds ( cardOff, cardSh ) ;
97
+ SafeArray2D<int,int,double> aaTrell ( cardOff, 1<<bitsVal, 0.0 ) ;
98
+ double prob = 1.0;
99
+
100
+ // For each offset...
101
+ for ( int off=0; off<cardOff; off++ )
102
+ // For each shape...
103
+ for ( int sh=0; sh<cardSh; sh++ )
104
+ // Update clique config for condition...
105
+ aaCnds.set(off,sh) = Y::getCliqueConfigCnd ( x1, x2, off, sh ) ;
106
+
107
+ // For each offset...
108
+ for ( int off=0; off<cardOff; off++ ) {
109
+ // For each shape...
110
+ for ( int sh=0; sh<cardSh; sh++ )
111
+ // Multiply phi for feature (that is, exp lambda) into numerator...
112
+ prob *= getPotential(Y::getGlobalDependency(x1,x2),off,sh,
113
+ aaCnds.get(off,sh),
114
+ Y::getCliqueConfigVal(off,sh));
115
+
116
+ // If first column in trellis...
117
+ if ( 0==off ) {
118
+ // For each trellis value...
119
+ for ( int configVal=0; configVal<(1<<bitsVal); configVal++ ) {
120
+ // Add weight of each shape at current offset...
121
+ float prod=1.0;
122
+ for ( int sh=0; sh<cardSh; sh++ )
123
+ prod *= getPotential(Y::getGlobalDependency(x1,x2),off,sh,
124
+ aaCnds.get(off,sh),
125
+ configVal) ;
126
+ aaTrell.set(off,configVal) = prod ;
127
+ }
128
+ // If subsequent column in trellis...
129
+ } else {
130
+ // For each trellis transition (overlap = all but one)...
131
+ for ( int configRghtValSite=0; configRghtValSite<(1<<bitsValSite); configRghtValSite++ )
132
+ for ( int configValOverlap=0; configValOverlap<(1<<(bitsVal-bitsValSite)); configValOverlap++ ) {
133
+ int configRghtVal = (configValOverlap<<bitsValSite)+configRghtValSite;
134
+ // For each possible preceding trellis node...
135
+ for ( int configLeftValSite=0; configLeftValSite<(1<<bitsValSite); configLeftValSite++ ) {
136
+ int configLeftVal = (configLeftValSite<<(bitsVal-bitsValSite))+configValOverlap;
137
+ // Add product of result and previous trellis cell to current trellis cell...
138
+ aaTrell.set(off,configRghtVal) += aaTrell.get(off-1,configLeftVal) ;
139
+ }
140
+ // Multiply weight of each shape...
141
+ float prod=1.0;
142
+ for ( int sh=0; sh<cardSh; sh++ )
143
+ prod *= getPotential(Y::getGlobalDependency(x1,x2),off,sh,
144
+ aaCnds.get(off,sh),
145
+ configRghtVal);
146
+ aaTrell.set(off,configRghtVal) *= prod;
147
+ }
148
+ }
149
+ } // END EACH OFFSET
150
+
151
+ // Calc total prob mass: sum of all possible forward scores in trellis...
152
+ double probZ = 0.0;
153
+ for ( int i=0; i<(1<<bitsVal); i++ )
154
+ probZ += aaTrell.get(cardOff-1,i);
155
+ // Normalize prob by total prob mass...
156
+ return prob/probZ;
157
+ }
158
+
159
+ ////////////////////////////////////////////////////////////////////////////////
160
+
161
+ template <class Y,class X1,class X2>
162
+ bool CRF3DModeledRV<Y,X1,X2>::readModelFields ( char* aps[], int numFields ) {
163
+ if ( 7==numFields )
164
+ setPotential ( X1(string(aps[1])), // globals
165
+ atoi(aps[2]), // offsets
166
+ atoi(aps[3]), // shapes
167
+ atoi(aps[4]), // cnds
168
+ atoi(aps[5]) ) = exp(atof(aps[6])) ; // vals
169
+ else return false;
170
+ return true;
171
+ }
172
+
173
+ ////////////////////////////////////////////////////////////////////////////////
174
+
175
+ template <class Y,class X1,class X2>
176
+ void CRF3DModeledRV<Y,X1,X2>::writeObservCliqueConfigs ( FILE* pf, int frame, const char* psMdl,
177
+ const X1& x1, const X2& x2, bool bObsVal ) const {
178
+ fprintf ( pf, "%04d> %s ", frame, psMdl );
179
+ // For each shape (feature slope)...
180
+ for ( int sh=0; sh<cardSh; sh++ ) {
181
+ // Print clique config condition at each offset...
182
+ for ( int off=0; off<cardOff; off++ )
183
+ fprintf ( pf, "%c", intToTetraHex(Y::getCliqueConfigCnd(x1,x2,off,sh)) );
184
+ if (sh<cardSh-1) printf(","); // commas between shapes
185
+ }
186
+ printf(" : "); // cond/val delimiter
187
+ // Print clique config value at each offset...
188
+ if ( bObsVal )
189
+ for ( int off=0; off<cardOff; off++ )
190
+ fprintf ( pf, "%c", intToTetraHex(Y::getCliqueConfigVal(off,0)) );
191
+ else fprintf ( pf, "_" ) ;
192
+ printf("\n");
193
+ }
194
+
195
+ ////////////////////////////////////////////////////////////////////////////////
196
+ ////////////////////////////////////////////////////////////////////////////////
197
+ //
198
+ // CRF4DModeledRV<Y>
199
+ //
200
+ ////////////////////////////////////////////////////////////////////////////////
201
+
202
+ template <class Y,class X1,class X2,class X3>
203
+ class CRF4DModeledRV : public Y {
204
+
205
+ private:
206
+
207
+ // Static data members...
208
+ static bool bModel; // whether model defined yet
209
+ static int cardGlb; // global dependencies (used in all potentials)
210
+ static int cardOff; // offset positions in site var sequence
211
+ static int cardSh; // clique shapes at each offset
212
+ static int cardCnd; // possible condition clique configs incl non-site vars in high bits
213
+ static int bitsVal; // size in bits of val part of clique config
214
+ static int bitsValSite; // size in bits of each site var in val clique config
215
+ static SafeArray5D<Id<int>,int,int,int,int,float> aaaaaPotentials; // the model
216
+ /* static SafeArray3D<int> aaaCnds; // calc features only once per frame */
217
+
218
+ public:
219
+
220
+ // Static extraction methods...
221
+ static const float& getPotential ( Id<int> glb, int off, int sh, int configCnd, int configVal )
222
+ { assert(bModel); return aaaaaPotentials.set(glb,off,sh,configCnd,configVal); }
223
+
224
+ // Static specification methods...
225
+ static void init ( int g, int o, int s, int c, int v, int b )
226
+ { cardGlb=g; cardOff=o; cardSh=s; cardCnd=c; bitsVal=v; bitsValSite=b; }
227
+ static float& setPotential ( Id<int> glb, int off, int sh, int configCnd, int configVal )
228
+ { if(!bModel){aaaaaPotentials.init(cardGlb,cardOff,cardSh,cardCnd,1<<bitsVal,1.0); bModel=true;}
229
+ return aaaaaPotentials.set(glb,off,sh,configCnd,configVal); }
230
+
231
+ // Static input / output methods...
232
+ static bool readModelFields ( char*[], int ) ;
233
+
234
+ // Extraction methods...
235
+ Prob getProb ( const X1&, const X2&, const X3& ) const ;
236
+
237
+ // Input / output methods...
238
+ void writeObservCliqueConfigs ( FILE*, int, const char*, const X1&, const X2&, const X3&, bool ) const ;
239
+ };
240
+
241
+ ////////////////////////////////////////////////////////////////////////////////
242
+
243
+ template <class Y,class X1,class X2,class X3> bool CRF4DModeledRV<Y,X1,X2,X3>::bModel = false;
244
+ template <class Y,class X1,class X2,class X3> int CRF4DModeledRV<Y,X1,X2,X3>::cardGlb = 0;
245
+ template <class Y,class X1,class X2,class X3> int CRF4DModeledRV<Y,X1,X2,X3>::cardOff = 0;
246
+ template <class Y,class X1,class X2,class X3> int CRF4DModeledRV<Y,X1,X2,X3>::cardSh = 0;
247
+ template <class Y,class X1,class X2,class X3> int CRF4DModeledRV<Y,X1,X2,X3>::cardCnd = 0;
248
+ template <class Y,class X1,class X2,class X3> int CRF4DModeledRV<Y,X1,X2,X3>::bitsVal = 0;
249
+ template <class Y,class X1,class X2,class X3> int CRF4DModeledRV<Y,X1,X2,X3>::bitsValSite = 0;
250
+ template <class Y,class X1,class X2,class X3> SafeArray5D<Id<int>,int,int,int,int,float>
251
+ CRF4DModeledRV<Y,X1,X2,X3>::aaaaaPotentials;
252
+ /* template <class Y,class X1,class X2> SafeArray3D<int> CRF4DModeledRV<Y,X1,X2>::aaaCnds; */
253
+
254
+ ////////////////////////////////////////////////////////////////////////////////
255
+
256
+ template <class Y,class X1,class X2,class X3>
257
+ Prob CRF4DModeledRV<Y,X1,X2,X3>::getProb( const X1& x1, const X2& x2, const X3& x3 ) const {
258
+
259
+ SafeArray2D<int,int,int> aaCnds ( cardOff, cardSh ) ;
260
+ SafeArray2D<int,int,double> aaTrell ( cardOff, 1<<bitsVal, 0.0 ) ;
261
+ double prob = 1.0;
262
+
263
+ // For each offset...
264
+ for ( int off=0; off<cardOff; off++ )
265
+ // For each shape...
266
+ for ( int sh=0; sh<cardSh; sh++ )
267
+ // Update clique config for condition...
268
+ aaCnds.set(off,sh) = Y::getCliqueConfigCnd ( x1, x2, x3, off, sh ) ;
269
+
270
+ // For each offset...
271
+ for ( int off=0; off<cardOff; off++ ) {
272
+ // For each shape...
273
+ for ( int sh=0; sh<cardSh; sh++ )
274
+ // Multiply phi for feature (that is, exp lambda) into numerator...
275
+ prob *= getPotential(Y::getGlobalDependency(x1,x2,x3),off,sh,
276
+ aaCnds.get(off,sh),
277
+ Y::getCliqueConfigVal(off,sh));
278
+
279
+ // If first column in trellis...
280
+ if ( 0==off ) {
281
+ // For each trellis value...
282
+ for ( int configVal=0; configVal<(1<<bitsVal); configVal++ ) {
283
+ // Add weight of each shape at current offset...
284
+ float prod=1.0;
285
+ for ( int sh=0; sh<cardSh; sh++ )
286
+ prod *= getPotential(Y::getGlobalDependency(x1,x2,x3),off,sh,
287
+ aaCnds.get(off,sh),
288
+ configVal) ;
289
+ aaTrell.set(off,configVal) = prod ;
290
+ }
291
+ // If subsequent column in trellis...
292
+ } else {
293
+ // For each trellis transition (overlap = all but one)...
294
+ for ( int configRghtValSite=0; configRghtValSite<(1<<bitsValSite); configRghtValSite++ )
295
+ for ( int configValOverlap=0; configValOverlap<(1<<(bitsVal-bitsValSite)); configValOverlap++ ) {
296
+ int configRghtVal = (configValOverlap<<bitsValSite)+configRghtValSite;
297
+ // For each possible preceding trellis node...
298
+ for ( int configLeftValSite=0; configLeftValSite<(1<<bitsValSite); configLeftValSite++ ) {
299
+ int configLeftVal = (configLeftValSite<<(bitsVal-bitsValSite))+configValOverlap;
300
+ // Add product of result and previous trellis cell to current trellis cell...
301
+ aaTrell.set(off,configRghtVal) += aaTrell.get(off-1,configLeftVal) ;
302
+ }
303
+ // Multiply weight of each shape...
304
+ float prod=1.0;
305
+ for ( int sh=0; sh<cardSh; sh++ )
306
+ prod *= getPotential(Y::getGlobalDependency(x1,x2,x3),off,sh,
307
+ aaCnds.get(off,sh),
308
+ configRghtVal);
309
+ aaTrell.set(off,configRghtVal) *= prod;
310
+ }
311
+ }
312
+ } // END EACH OFFSET
313
+
314
+ // Calc total prob mass: sum of all possible forward scores in trellis...
315
+ double probZ = 0.0;
316
+ for ( int i=0; i<(1<<bitsVal); i++ )
317
+ probZ += aaTrell.get(cardOff-1,i);
318
+ // Normalize prob by total prob mass...
319
+ return prob/probZ;
320
+ }
321
+
322
+ ////////////////////////////////////////////////////////////////////////////////
323
+
324
+ template <class Y,class X1,class X2,class X3>
325
+ bool CRF4DModeledRV<Y,X1,X2,X3>::readModelFields ( char* aps[], int numFields ) {
326
+ if ( 7==numFields )
327
+ setPotential ( X1(string(aps[1])), // globals
328
+ atoi(aps[2]), // offsets
329
+ atoi(aps[3]), // shapes
330
+ atoi(aps[4]), // cnds
331
+ atoi(aps[5]) ) = exp(atof(aps[6])) ; // vals
332
+ else return false;
333
+ return true;
334
+ }
335
+
336
+ ////////////////////////////////////////////////////////////////////////////////
337
+
338
+ template <class Y,class X1,class X2, class X3>
339
+ void CRF4DModeledRV<Y,X1,X2,X3>::writeObservCliqueConfigs ( FILE* pf, int frame, const char* psMdl,
340
+ const X1& x1, const X2& x2,
341
+ const X3& x3, bool bObsVal ) const {
342
+ fprintf ( pf, "%04d> %s ", frame, psMdl );
343
+ // For each shape (feature slope)...
344
+ for ( int sh=0; sh<cardSh; sh++ ) {
345
+ // Print clique config condition at each offset...
346
+ for ( int off=0; off<cardOff; off++ )
347
+ fprintf ( pf, "%c", intToTetraHex(Y::getCliqueConfigCnd(x1,x2,x3,off,sh)) );
348
+ if (sh<cardSh-1) printf(","); // commas between shapes
349
+ }
350
+ printf(" : "); // cond/val delimiter
351
+ // Print clique config value at each offset...
352
+ if ( bObsVal )
353
+ for ( int off=0; off<cardOff; off++ )
354
+ fprintf ( pf, "%c", intToTetraHex(Y::getCliqueConfigVal(off,0)) );
355
+ else fprintf ( pf, "_" ) ;
356
+ printf("\n");
357
+ }
358
+
359
+ #endif //_NL_CRF__
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-dtree-cont.h ADDED
@@ -0,0 +1,479 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+
25
+ #ifndef _NL_DTREE_CONTIN__
26
+ #define _NL_DTREE_CONTIN__
27
+
28
+ #include "nl-dtree.h"
29
+
30
+ typedef double Wt;
31
+
32
+ ////////////////////////////////////////////////////////////////////////////////
33
+ //
34
+ // Cont DTree Node
35
+ //
36
+ ////////////////////////////////////////////////////////////////////////////////
37
+
38
+ template<class Y, class P>
39
+ class ContDecisNode : public DecisNode<Y,P> {
40
+ private:
41
+ // Data members...
42
+ Wt wThreshold; // Threshold weight ("w_0")
43
+ map<A,Wt> awSeparator; // Hyperplane separator, weights on each attribute/dimension
44
+ Wt wSumSqr; // Sum of squares (parabolic) convolution coordinate weight
45
+
46
+ public:
47
+ // Constructor / destructor methods...
48
+ ContDecisNode ( ) : wThreshold(0.0), wSumSqr(0.0) { }
49
+
50
+ // Specification methods...
51
+ Wt& setWt ( ) { return wThreshold; }
52
+ Wt& setWt ( const A a ) { return (awSeparator.find(a)!=awSeparator.end()) ? awSeparator[a] : awSeparator[a]=0.0; }
53
+ Wt& setSsWt ( ) { return wSumSqr; }
54
+
55
+ // Extraction methods...
56
+ const Wt getWt ( ) const { return wThreshold; }
57
+ const Wt getWt ( const A a ) const { return ( (awSeparator.find(a)!=awSeparator.end()) ? awSeparator.find(a)->second : 0.0 ); }
58
+ const Wt getSsWt ( ) const { return wSumSqr; }
59
+ };
60
+
61
+ ////////////////////////////////////////////////////////////////////////////////
62
+ //
63
+ // ContDTree Model
64
+ //
65
+ ////////////////////////////////////////////////////////////////////////////////
66
+
67
+ template<class Y, class X, class P>
68
+ class ContDTree2DModel : public Generic2DModel<Y,X,P>, public Tree<ContDecisNode<Y,P> > {
69
+ public:
70
+ // Downcasts (safe b/c no new data)...
71
+ ContDTree2DModel<Y,X,P>& setLeft() { return static_cast<ContDTree2DModel<Y,X,P>&> ( Tree<ContDecisNode<Y,P> >::setLeft() ); }
72
+ ContDTree2DModel<Y,X,P>& setRight() { return static_cast<ContDTree2DModel<Y,X,P>&> ( Tree<ContDecisNode<Y,P> >::setRight() ); }
73
+ const ContDTree2DModel<Y,X,P>& getLeft() const { return static_cast<const ContDTree2DModel<Y,X,P>&> ( Tree<ContDecisNode<Y,P> >::getLeft() ); }
74
+ const ContDTree2DModel<Y,X,P>& getRight() const { return static_cast<const ContDTree2DModel<Y,X,P>&> ( Tree<ContDecisNode<Y,P> >::getRight() ); }
75
+ // Extraction methods...
76
+ const P getProb ( const Y y, const X& x ) const {
77
+ const Tree<ContDecisNode<Y,P> >* ptr = this;
78
+ while ( !ptr->isTerm() ) {
79
+ double sumsqr=0.0;
80
+ for(A a;a<X::getSize();a.setNext()) sumsqr += pow(x.get(a.toInt()),2.0) / X::getSize();
81
+ Wt wtdavg = -Tree<ContDecisNode<Y,P> >::getWt();
82
+ for(A a;a<X::getSize();a.setNext()) wtdavg += Tree<ContDecisNode<Y,P> >::getWt(a) * x.get(a.toInt());
83
+ wtdavg += Tree<ContDecisNode<Y,P> >::getSsWt() * sumsqr;
84
+ ptr = (wtdavg>0.0) ? &ptr->getRight() : &ptr->getLeft();
85
+ }
86
+ return ptr->getProb(y);
87
+ }
88
+ // Input / output methods...
89
+ bool readFields ( char*[], int ) ;
90
+ void writeFields ( FILE* pf, string sPref ) {
91
+ char psPath[1000] = "";
92
+ write ( pf, (sPref+"").c_str(), psPath, 0 );
93
+ }
94
+ void write ( FILE* pf, const char psPrefix[], char psPath[], int iEnd ) const {
95
+ if (Tree<ContDecisNode<Y,P> >::isTerm()) {
96
+ Y y;
97
+ psPath[iEnd]='\0';
98
+ for ( bool b=y.setFirst(); b; b=y.setNext() )
99
+ { fprintf(pf, "%s [%s] : ", psPrefix, psPath); y.write(pf); fprintf(pf, " = %f\n", (double)Tree<ContDecisNode<Y,P> >::getProb(y)); }
100
+ ////psPath[iEnd]='\0'; fprintf ( pf, "%s [%s] : 0 = %f\n", psPrefix, psPath, (double)Tree<ContDecisNode<Y,P> >::getProb("0") );
101
+ ////psPath[iEnd]='\0'; fprintf ( pf, "%s [%s] : 1 = %f\n", psPrefix, psPath, (double)Tree<ContDecisNode<Y,P> >::getProb("1") );
102
+ } else {
103
+ psPath[iEnd]='\0'; fprintf ( pf, "%s [%s] =", psPrefix, psPath );
104
+ fprintf ( pf, " %f", Tree<ContDecisNode<Y,P> >::getWt() );
105
+ for(A a;a<X::getSize();a.setNext()) fprintf ( pf, "_%f", Tree<ContDecisNode<Y,P> >::getWt(a.toInt()) );
106
+ fprintf ( pf, "_%f", Tree<ContDecisNode<Y,P> >::getSsWt() );
107
+ fprintf ( pf, "\n" );
108
+ psPath[iEnd]='0'; psPath[iEnd+1]='\0'; getLeft().write ( pf, psPrefix, psPath, iEnd+1 );
109
+ psPath[iEnd]='1'; psPath[iEnd+1]='\0'; getRight().write ( pf, psPrefix, psPath, iEnd+1 );
110
+ }
111
+ }
112
+ };
113
+
114
+ ////////////////////
115
+ template <class Y,class X, class P>
116
+ bool ContDTree2DModel<Y,X,P>::readFields ( char* aps[], int numFields ) {
117
+ if ( /*aps[0]==sId &&*/ (3==numFields || 4==numFields) ) {
118
+ //fprintf(stderr,"%s,%d\n",aps[3],numFields);
119
+ assert ( '['==aps[1][0] && ']'==aps[1][strlen(aps[1])-1] );
120
+
121
+ // Start at root...
122
+ Tree<ContDecisNode<Y,P> >* ptr = this;
123
+ assert(ptr);
124
+
125
+ // Find appropriate node, creating nodes as necessary...
126
+ for(int i=1; i<strlen(aps[1])-1; i++) {
127
+ assert ( '0'==aps[1][i] || '1'==aps[1][i] );
128
+ ptr = ( ('0'==aps[1][i]) ? &ptr->setLeft() : &ptr->setRight() ) ;
129
+ assert(ptr);
130
+ }
131
+
132
+ // Specify bit (at nonterminal) or distribution (at terminal)...
133
+ if ( 3==numFields) {
134
+ char* psT=NULL; Tree<ContDecisNode<Y,P> >::setWt() = atof(strtok_r(aps[2],"_",&psT)); ////atof(aps[2]);
135
+ for(A a;a<X::getSize();a.setNext()) Tree<ContDecisNode<Y,P> >::setWt(a) = atof(strtok_r(NULL,"_",&psT));
136
+ Tree<ContDecisNode<Y,P> >::setSsWt() = atof(strtok_r(NULL,"_",&psT)); }
137
+ // atof(aps[3+a.toInt()]); }
138
+ else if (4==numFields) ptr->setProb(aps[2]) = atof(aps[3]);
139
+ else assert(false);
140
+
141
+ } else return false;
142
+ return true;
143
+ }
144
+
145
+ ////////////////////////////////////////////////////////////////////////////////
146
+
147
+ template<class Y, class X1,class X2, class P>
148
+ class ContDTree3DModel : public Generic3DModel<Y,X1,X2,P> {
149
+ private:
150
+ // Data members...
151
+ string sId;
152
+ SimpleHash<X1,ContDTree2DModel<Y,X2,P> > aqt;
153
+ public:
154
+ // Constructor / destructor methods...
155
+ ContDTree3DModel ( ) { }
156
+ ContDTree3DModel ( const string& s ) { sId = s; }
157
+ // Specification methods...
158
+ ContDTree2DModel<Y,X2,P>& setTree ( const X1& x1 ) { return aqt.set(x1); }
159
+ // Extraction methods...
160
+ bool setFirst ( Y& y ) const { return y.setFirst(); }
161
+ bool setNext ( Y& y ) const { return y.setNext(); }
162
+ P getProb ( const Y y, const X1& x1, const X2& x2 ) const { return aqt.get(x1).getProb(y,x2); }
163
+ // Input / output methods...
164
+ bool readFields ( char*[], int ) ;
165
+ void writeFields ( FILE* pf, string sPref ) {
166
+ char psPath[1000] = "";
167
+ X1 x1;
168
+ for ( bool b=x1.setFirst(); b; b=x1.setNext() )
169
+ aqt.get(x1).write ( pf, (sPref + " " + x1.getString()).c_str(), psPath, 0 );
170
+ }
171
+ };
172
+
173
+ ////////////////////
174
+ template <class Y,class X1,class X2, class P>
175
+ bool ContDTree3DModel<Y,X1,X2,P>::readFields ( char* aps[], int numFields ) {
176
+ if ( /*aps[0]==sId &&*/ (4==numFields || 5==numFields) ) {
177
+ //fprintf(stderr,"%s,%d\n",aps[3],numFields);
178
+ assert ( '['==aps[2][0] && ']'==aps[2][strlen(aps[2])-1] );
179
+
180
+ // Start at root...
181
+ Tree<ContDecisNode<Y,P> >* ptr = &aqt.set(aps[1]);
182
+ assert(ptr);
183
+
184
+ // Find appropriate node, creating nodes as necessary...
185
+ for(int i=1; i<strlen(aps[2])-1; i++) {
186
+ assert ( '0'==aps[2][i] || '1'==aps[2][i] );
187
+ ptr = ( ('0'==aps[2][i]) ? &ptr->setLeft() : &ptr->setRight() ) ;
188
+ assert(ptr);
189
+ }
190
+
191
+ // Specify bit (at nonterminal) or distribution (at terminal)...
192
+ if ( 4==numFields) {
193
+ char* psT=NULL;
194
+ ptr->setWt() = atof(strtok_r(aps[3],"_",&psT)); ////atof(aps[3]);
195
+ for(A a;a<X2::getSize();a.setNext()) ptr->setWt(a) = atof(strtok_r(NULL,"_",&psT));
196
+ ptr->setSsWt() = atof(strtok_r(NULL,"_",&psT)); }
197
+ ////for(A a;a<X2::getSize();a.setNext()) ptr->setWt(a) = atof(aps[4+a.toInt()]); }
198
+ else if (5==numFields) ptr->setProb(aps[3]) = atof(aps[4]);
199
+ //// else if (5==numFields && 0==strcmp(aps[3],"0")) ptr->setProb() = 1.0 - atof(aps[4]);
200
+ //// else if (5==numFields && 0==strcmp(aps[3],"1")) ptr->setProb() = atof(aps[4]);
201
+ else assert(false);
202
+
203
+ } else return false;
204
+ return true;
205
+ }
206
+
207
+
208
+ ////////////////////////////////////////////////////////////////////////////////
209
+ //
210
+ // Trainable ContDTree Model
211
+ //
212
+ ////////////////////////////////////////////////////////////////////////////////
213
+
214
+ template<class Y, class X, class P>
215
+ class TrainableContDTree2DModel : public ContDTree2DModel<Y,X,P> {
216
+ private:
217
+ List<Joint2DRV<X,Y> > lxy;
218
+ public:
219
+ // Downcasts (safe b/c no new data)...
220
+ TrainableContDTree2DModel<Y,X,P>& setLeft() { return static_cast<TrainableContDTree2DModel<Y,X,P>&>(ContDTree2DModel<Y,X,P>::setLeft()); }
221
+ TrainableContDTree2DModel<Y,X,P>& setRight() { return static_cast<TrainableContDTree2DModel<Y,X,P>&>(ContDTree2DModel<Y,X,P>::setRight()); }
222
+ const TrainableContDTree2DModel<Y,X,P>& getLeft() const { return static_cast<const TrainableContDTree2DModel<Y,X,P>&>(ContDTree2DModel<Y,X,P>::getLeft()); }
223
+ const TrainableContDTree2DModel<Y,X,P>& getRight() const { return static_cast<const TrainableContDTree2DModel<Y,X,P>&>(ContDTree2DModel<Y,X,P>::getRight()); }
224
+ // Specification methods...
225
+ void train ( List<Joint2DRV<X,Y> >&, const double ) ;
226
+ void train ( const double d ) { train(lxy,d); }
227
+ ////// Input / output methods...
228
+ bool readData ( char* vs[], int numFields ) {
229
+ if ( 3==numFields ) lxy.add() = Joint2DRV<X,Y> ( X(vs[1]), Y(vs[2]) );
230
+ else return false;
231
+ return true;
232
+ }
233
+ };
234
+
235
+ ////////////////////
236
+ template<class Y, class X, class P>
237
+ void TrainableContDTree2DModel<Y,X,P>::train ( List<Joint2DRV<X,Y> >& lxy, const double DTREE_CHISQR_LIMIT ) {
238
+
239
+ // Place to store counts...
240
+ //CPT3DModel<A,B,Y,double> aaaCounts; // hash was MUCH slower!!
241
+ SafeArray2D<B,Y,double> aaCounts ( 2, Y::getDomain().getSize(), 0.0 );
242
+ double dTot = lxy.getCard();
243
+ CPT1DModel<Y,double> modelY;
244
+
245
+ // if (11613==dTot) { //if (12940<=dTot && dTot<12950) { //if ( 20779==dTot ) { //// (bU)
246
+ // ListedObject<Joint2DRV<X,Y> >* pxy;
247
+ // for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) ) {
248
+ // fprintf(stdout,"Ohist "); pxy->getSub1().write(stdout); fprintf(stdout," "); pxy->getSub2().write(stdout); fprintf(stdout,"\n");
249
+ // }
250
+ // fprintf(stderr,"PRINTED\n");
251
+ // }
252
+
253
+ // For each datum in list...
254
+ ListedObject<Joint2DRV<X,Y> >* pxy;
255
+ for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) ) {
256
+ // Count Ys...
257
+ modelY.setProb(pxy->getSub2())++;
258
+ }
259
+ modelY.normalize();
260
+
261
+ double prRarest = (modelY.getProb("1")<modelY.getProb("0")) ? modelY.getProb("1") : modelY.getProb("0");
262
+
263
+ // // Set separator to pass through center of positives...
264
+ // for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) )
265
+ // for ( A a; a<X::getSize(); a.setNext() ) {
266
+ // if ( Y("1")==pxy->getSub2() ) {
267
+ // Tree<ContDecisNode<Y,P> >::setWt() -= (pxy->getSub1().get(a.toInt())+pow(pxy->getSub1().get(a.toInt()),2.0))/dTot; //// (dTot*prRarest);
268
+ // Tree<ContDecisNode<Y,P> >::setWt(a) += pxy->getSub1().get(a.toInt())/dTot; //// (dTot*prRarest);
269
+ // Tree<ContDecisNode<Y,P> >::setSsWt() += pow(pxy->getSub1().get(a.toInt()),2.0)/dTot; //// (dTot*prRarest);
270
+ // }
271
+ // }
272
+
273
+ // Set separator to pass through center of positives...
274
+ Tree<ContDecisNode<Y,P> >::setWt() = 1.0;
275
+
276
+
277
+ // For each gradient descent epoch...
278
+ for ( int epoch=1; epoch<=1000; epoch++ ) {
279
+
280
+ double dCtr=0.0;
281
+
282
+ double dPos = 0.0;
283
+ ListedObject<Joint2DRV<X,Y> >* pxy;
284
+
285
+ if(OUTPUT_NOISY) {
286
+ double lgprTot = 0.0;
287
+ // // For each datum in list...
288
+ // for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) ) {
289
+ // // Calc tot prob...
290
+ // double wtdavg = -Tree<ContDecisNode<Y,P> >::getWt();
291
+ // for(A a;a<X::getSize();a.setNext()) wtdavg += Tree<ContDecisNode<Y,P> >::getWt(a) * pxy->getSub1().get(a.toInt());
292
+ // // Calc est val of Y using sigmoid transfer fn...
293
+ // P prY = 1.0 / ( 1.0 + exp(-wtdavg) );
294
+ // if(epoch>1)fprintf(stderr," %f %f\n",(double)wtdavg,(double)prY);
295
+ // lgprTot += (pxy->getSub2()==1) ? log(prY) : log(1.0-prY) ;
296
+ // }
297
+
298
+ if (OUTPUT_NOISY && epoch%10==0) {
299
+ // Report...
300
+ fprintf(stderr," tot=%08d totlogprob=%g separator=%f",(int)dTot,lgprTot,Tree<ContDecisNode<Y,P> >::getWt());
301
+ for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getWt(a));
302
+ fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getSsWt());
303
+ fprintf(stderr,"\n");
304
+ }
305
+ }
306
+
307
+ fprintf(stderr," --- epoch %d ---\n",epoch);
308
+
309
+ // For each datum in list...
310
+ for ( pxy = lxy.getFirst(); pxy; pxy = lxy.getNext(pxy) ) {
311
+ // // Use random subset of more frequent Y val so total wts for 1 and 0 are equal (CODE REVIEW: should be subset nearest to centroid of fewer)...
312
+ // if ( double(rand())/double(RAND_MAX) < prRarest/modelY.getProb(pxy->getSub2()) ) {
313
+
314
+ dCtr++;
315
+ double gamma = dTot/(dTot+dCtr); // 1.0/(double(epoch)+dCtr/dTot); // 1.0/double(epoch); // 1.0/(double(epoch)+dCtr/(dTot*prRarest*2.0)); //
316
+
317
+ // Weight deltas for next epoch...
318
+ Wt wDelta = 0.0;
319
+ SafeArray1D<A,Wt> awDeltas (X::getSize(),0.0);
320
+ Wt wSsDelta = 0.0;
321
+
322
+ // Calc sum of squares for convolution coordinate...
323
+ double sumsqr=0.0;
324
+ for(A a;a<X::getSize();a.setNext()) sumsqr += pow(pxy->getSub1().get(a.toInt()),2.0) / X::getSize();
325
+
326
+ // Calc wtd avg of feats...
327
+ double wtdavg = -Tree<ContDecisNode<Y,P> >::getWt();
328
+ for(A a;a<X::getSize();a.setNext()) wtdavg += Tree<ContDecisNode<Y,P> >::getWt(a) * pxy->getSub1().get(a.toInt());
329
+ wtdavg += Tree<ContDecisNode<Y,P> >::getSsWt() * sumsqr;
330
+ //// Calc est val of Y using sigmoid transfer fn...
331
+ //P prY = ( ( ( (1.0/(1.0+exp(-wtdavg))) - .5 ) * exp(-wtdavg) ) + .5 ) ;
332
+ // Calc est val of Y using sigmoid transfer fn...
333
+ P prY = 1.0 / ( 1.0 + exp(-wtdavg) );
334
+
335
+ // Calc deltas for each feature/attribute/dimension...
336
+ double dEachWt = 1.0/dTot; // 1.0/dTot * modelY.getProb ( Y(1-pxy->getSub2().toInt()) ); // 1.0/(dTot*prRarest*2.0); //
337
+ wDelta += dEachWt * -1 * ( prY - P(double(pxy->getSub2().toInt())) );
338
+ for ( A a; a<X::getSize(); a.setNext() )
339
+ awDeltas.set(a) += dEachWt * pxy->getSub1().get(a.toInt()) * ( prY - P(double(pxy->getSub2().toInt())) );
340
+ wSsDelta += dEachWt * sumsqr * ( prY - P(double(pxy->getSub2().toInt())) );
341
+
342
+ // Update weights by deltas...
343
+ //Tree<ContDecisNode<Y,P> >::setWt() -= gamma * wDelta;
344
+ ////double reldeduction = wDelta/Tree<ContDecisNode<Y,P> >::getWt();
345
+ for ( A a; a<X::getSize(); a.setNext() )
346
+ Tree<ContDecisNode<Y,P> >::setWt(a) -= gamma*awDeltas.get(a); //+ changeratio/Tree<ContDecisNode<Y,P> >::getWt(a);
347
+ Tree<ContDecisNode<Y,P> >::setSsWt() -= gamma*wSsDelta; //+ changeratio/Tree<ContDecisNode<Y,P> >::getSsWt();
348
+
349
+ dPos+=prY; // if (prY>0.5) dPos++;
350
+
351
+ // Report...
352
+ if(OUTPUT_VERYNOISY) {
353
+ fprintf(stderr," A tot=%08d vals = %f",(int)dTot,-1.00);
354
+ for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",pxy->getSub1().get(a.toInt()));
355
+ fprintf(stderr,"_%f",sumsqr);
356
+ fprintf(stderr," --> %f %f (gold: %f)\n",wtdavg,(double)prY,double(pxy->getSub2().toInt()));
357
+ fprintf(stderr," D tot=%08d delt = %f",(int)dTot,wDelta);
358
+ for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",awDeltas.get(a));
359
+ fprintf(stderr,"_%f",wSsDelta);
360
+ fprintf(stderr,"\n");
361
+ }
362
+
363
+ // Report...
364
+ if(OUTPUT_VERYNOISY) {
365
+ fprintf(stderr," _S tot=%08d sepr = %f",(int)dTot,Tree<ContDecisNode<Y,P> >::getWt());
366
+ for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getWt(a));
367
+ fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getSsWt());
368
+ fprintf(stderr,"\n");
369
+ }
370
+ // }
371
+ } // end loop pxy
372
+
373
+ /* // Report... */
374
+ /* if(OUTPUT_NOISY) { */
375
+ /* fprintf(stderr," tot:%08d +:%08d -:%08d\n",(int)dTot,(int)dPos,(int)(dTot-dPos)); */
376
+ /* fprintf(stderr," E tot=%08d separator=%f",(int)dTot,wDelta); */
377
+ /* for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",awDeltas.get(a)); */
378
+ /* fprintf(stderr,"\n"); */
379
+ /* } */
380
+ } // end loop epoch
381
+
382
+ // Split list into each 0/1 child of this node...
383
+ List<Joint2DRV<X,Y> > alxy[2];
384
+ int actr[2] = {0,0};
385
+ // For each datum in list...
386
+ while ( !lxy.isEmpty() ) {
387
+ Joint2DRV<X,Y>* pxy = lxy.getFirst();
388
+ double sumsqr=0.0;
389
+ for(A a;a<X::getSize();a.setNext()) sumsqr += pow(pxy->getSub1().get(a.toInt()),2.0) / X::getSize();
390
+ Wt wtdavg=-Tree<ContDecisNode<Y,P> >::getWt();
391
+ for(A a;a<X::getSize();a.setNext()) wtdavg += Tree<ContDecisNode<Y,P> >::getWt(a) * pxy->getSub1().get(a.toInt());
392
+ wtdavg += Tree<ContDecisNode<Y,P> >::getSsWt() * sumsqr;
393
+ alxy[(wtdavg>0.0)?1:0].add() = *pxy;
394
+ aaCounts.set((wtdavg>0.0)?1:0,pxy->getSub2())++;
395
+ actr[(wtdavg>0.0)?1:0]++;
396
+ if(OUTPUT_VERYNOISY){fprintf(stderr,"classify "); pxy->write(stderr); fprintf(stderr," wtdavg=%f class=%d\n",wtdavg,(wtdavg>0.0)?1:0);}
397
+ lxy.pop();
398
+ }
399
+
400
+ // Calc chisqr...
401
+ double chisqr = 0.0;
402
+ fprintf(stderr," tot=%08d split=",(int)dTot);
403
+ for ( int b=0; b<2; b++ ) {
404
+ Y y;
405
+ for ( bool by=y.setFirst(); by; by=y.setNext() ) {
406
+ fprintf(stderr," (%s->%d:%f)",y.getString().c_str(),b,aaCounts.get(b,y));
407
+ if ( actr[b]>0.0 && modelY.getProb(y)>0.0 && dTot>0.0 ) {
408
+ double expect = actr[b] * modelY.getProb(y);
409
+ chisqr += pow ( aaCounts.get(b,y)-expect, 2.0 ) / expect;
410
+ }
411
+ }
412
+ }
413
+ fprintf(stderr,"\n");
414
+
415
+ // Report...
416
+ if(OUTPUT_NOISY) {
417
+ fprintf(stderr," tot=%08d separator=%f",(int)dTot,Tree<ContDecisNode<Y,P> >::getWt());
418
+ for(A a;a<X::getSize();a.setNext()) fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getWt(a));
419
+ fprintf(stderr,"_%f",Tree<ContDecisNode<Y,P> >::getSsWt());
420
+ fprintf(stderr," chisqr=%g\n",chisqr);
421
+ }
422
+
423
+ // If separation is significant to chisqr limit...
424
+ if ( chisqr > DTREE_CHISQR_LIMIT ) {
425
+ // Recursively call train at each child...
426
+ setRight().train ( alxy[1], DTREE_CHISQR_LIMIT ); ////node*2LL+1LL);
427
+ setLeft().train ( alxy[0], DTREE_CHISQR_LIMIT ); ////node*2LL);
428
+ }
429
+ // If separation is not significant...
430
+ else {
431
+ // Add ratio as leaf...
432
+ Y y;
433
+ for ( bool by=y.setFirst(); by; by=y.setNext() )
434
+ ContDecisNode<Y,P>::setProb(y) = (dTot>0.0) ? modelY.getProb(y) : 1.0/Y::getDomain().getSize();
435
+ }
436
+ }
437
+
438
+
439
+ ////////////////////////////////////////////////////////////////////////////////
440
+
441
+ template<class Y, class X1, class X2, class P>
442
+ class TrainableContDTree3DModel : public ContDTree3DModel<Y,X1,X2,P> {
443
+
444
+ private:
445
+
446
+ map<X1,List<Joint2DRV<X2,Y> > > mqlxy;
447
+
448
+ public:
449
+
450
+ ////// Constructor...
451
+ TrainableContDTree3DModel() { }
452
+ TrainableContDTree3DModel(const char* ps) : ContDTree3DModel<Y,X1,X2,P>(ps) { }
453
+
454
+ ////// setTree downcast...
455
+ TrainableContDTree2DModel<Y,X2,P>& setTree(const X1& x1) { return static_cast<TrainableContDTree2DModel<Y,X2,P>&>(ContDTree3DModel<Y,X1,X2,P>::setTree(x1)); }
456
+
457
+ ////// Add training data to per-subphone lists...
458
+ bool readData ( char* vs[], int numFields ) {
459
+ if ( 4==numFields ) {
460
+ mqlxy[X1(vs[1])].add() = Joint2DRV<X2,Y> ( X2(vs[2]), Y(vs[3]) );
461
+ ////mqlxy[X1(vs[1])].getLast()->write(stderr); fprintf(stderr,"\n");
462
+ }
463
+ else return false;
464
+ return true;
465
+ }
466
+
467
+ ////// Train each subphone...
468
+ void train ( const double DTREE_CHISQR_LIMIT ) {
469
+ int ctr = 0;
470
+ // For each subphone...
471
+ X1 x1; for ( bool b=x1.setFirst(); b; b=x1.setNext() ) {
472
+ if(OUTPUT_NOISY)
473
+ fprintf(stderr,"***** x1:%s (number %d) *****\n",x1.getString().c_str(),ctr++);
474
+ setTree(x1).train ( mqlxy[x1], DTREE_CHISQR_LIMIT );
475
+ }
476
+ }
477
+ };
478
+
479
+ #endif // _NL_DTREE_CONTIN__
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+
25
+ //basically a SafeArray2D with operators defined
26
+ template<class T>
27
+ class Matrix : public SafeArray2D<Id<int>,Id<int>,T> {
28
+ //public:
29
+ //int xSize;
30
+ //int ySize;
31
+ public:
32
+ // Constructor / destructor methods...
33
+ //~Matrix( ) { delete[] at; }
34
+ Matrix ( ) : SafeArray2D<Id<int>,Id<int>,T>( ) { }//{ xSize=0; ySize=0; }
35
+ Matrix (int x, int y) : SafeArray2D<Id<int>,Id<int>,T>(x,y) { }//{ xSize=x; ySize=y; }
36
+ Matrix (int x, int y, const T& t) : SafeArray2D<Id<int>,Id<int>,T>(x,y,t) { }//{ xSize=x; ySize=y; }
37
+ Matrix (const Matrix& a) : SafeArray2D<Id<int>,Id<int>,T>(a.xSize(),a.ySize()) { //xSize=a.xSize; ySize=a.ySize;
38
+ for(int i=0;i<xSize();i++) for(int j=0;j<ySize();j++) this->set(i,j)=a.get(i,j); }
39
+ // Specification methods...
40
+ //Matrix& operator= ( const Matrix<T>& sat )
41
+ // { xSize=sat.xSize; ySize=sat.ySize; //at=new T[xSize*ySize];
42
+ // for(int i=0;i<xSize;i++) for(int j=0;j<ySize;j++) set(i,j)=sat.at[i]; return *this; }
43
+ void init ( int x,int y ) { (*this)=Matrix<T>(x,y,T()); }//xSize=x; ySize=y; }
44
+ void init ( int x,int y,const T& t ) { (*this)=Matrix<T>(x,y,t); }//xSize=x; ySize=y; }
45
+ void reset() { (*this)=Matrix<T>( ); }//xSize=0; ySize=0; }
46
+
47
+ // Inherited methods
48
+ //T& set ( const X1& x,const X2& y);
49
+ //const T& get (const X1& x,const X2& y) const;
50
+
51
+ int xSize( ) const { return this->getxSize(); }
52
+ int ySize( ) const { return this->getySize(); }
53
+
54
+ // Math...
55
+ friend Matrix<T> operator* ( const Matrix<T>& a, const Matrix<T>& b ) {
56
+ if (a.ySize()!=b.xSize()) {
57
+ cerr<<"ERROR: matrix multiplication requires matching inner indices; "<<a.xSize()<<"x"<<a.ySize()<<" "<<b.xSize()<<"x"<<b.ySize()<<endl;
58
+ #ifndef NDEBUG
59
+ cerr<<" a= "<<a<<"\n\n b= "<<b<<endl;
60
+ #endif
61
+ return Matrix<T>();
62
+ }
63
+ Matrix mOut(a.xSize(),b.ySize(),T());
64
+ for (int i=0; i<a.xSize(); i++ ){
65
+ for (int k=0; k<a.ySize(); k++ ) {
66
+ for (int j=0; j<b.ySize(); j++ ) {
67
+ mOut.set(i,j) += a.get(Id<int>(i),Id<int>(k))*b.get(Id<int>(k),Id<int>(j));
68
+ }
69
+ }
70
+ }
71
+ //cerr<<" a= "<<a<<"\n b= "<<b<<"\n c= "<<mOut<<endl<<endl;
72
+ return mOut;
73
+ }
74
+ friend Matrix<T> operator& ( const Matrix<T>& a, const Matrix<T>& b ) {
75
+ if (a.xSize()!=b.xSize() || a.ySize()!=b.ySize()) {
76
+ cerr<<"ERROR: pt-by-pt multiplication requires matching indices; "<<a.xSize()<<"x"<<a.ySize()<<" "<<b.xSize()<<"x"<<b.ySize()<<endl;
77
+ #ifndef NDEBUG
78
+ cerr<<" a= "<<a<<"\n\n b= "<<b<<endl;
79
+ #endif
80
+ return Matrix<T>();
81
+ }
82
+ Matrix mOut(a.xSize(),a.ySize(),T());
83
+ for (int i=0; i<a.xSize(); i++ ){
84
+ for (int j=0; j<b.ySize(); j++ ) {
85
+ mOut.set(i,j) += a.get(Id<int>(i),Id<int>(j))*b.get(Id<int>(i),Id<int>(j));
86
+ }
87
+ }
88
+ //cerr<<" a= "<<a<<"\n b= "<<b<<"\n c= "<<mOut<<endl<<endl;
89
+ return mOut;
90
+ }
91
+ friend Matrix<T> operator+ ( const Matrix<T>& a, const Matrix<T>& b ) {
92
+ if (a.xSize()!=b.xSize() || a.ySize()!=b.ySize()) {
93
+ cerr<<"ERROR: matrix addition requires matching dimensions"<<endl;
94
+ return Matrix<T>();
95
+ }
96
+ Matrix mOut(a.xSize(),b.ySize(),T());
97
+ for (int i=0; i<a.xSize(); i++ ){
98
+ for (int j=0; j<a.ySize(); j++ ) {
99
+ mOut.set(i,j) = a.get(Id<int>(i),Id<int>(j))+b.get(Id<int>(i),Id<int>(j));
100
+ }
101
+ }
102
+ return mOut;
103
+ }
104
+ friend Matrix<T> operator- ( const Matrix<T>& a, const Matrix<T>& b ) {
105
+ if (a.xSize()!=b.xSize() || a.ySize()!=b.ySize()) {
106
+ cerr<<"ERROR: matrix subtraction requires matching dimensions"<<endl;
107
+ //cerr<<"aSize="<<a.xSize<<","<<a.ySize()<<" bSize="<<b.xSize<<","<<b.ySize()<<endl;
108
+ //cerr<<" a= "<<a<<"\n b= "<<b<<"\n c= "<<mOut<<endl<<endl;
109
+ return Matrix<T>();
110
+ }
111
+ Matrix mOut(a.xSize(),b.ySize(),T());
112
+ for (int i=0; i<a.xSize(); i++ ){
113
+ for (int j=0; j<a.ySize(); j++ ) {
114
+ mOut.set(i,j) = a.get(Id<int>(i),Id<int>(j))-b.get(Id<int>(i),Id<int>(j));
115
+ }
116
+ }
117
+ return mOut;
118
+ }
119
+ friend Matrix<T> operator* ( const Matrix<T>& a, const T& t ) {
120
+ Matrix mOut(a.xSize(),a.ySize());
121
+ for (int i=0; i<a.xSize(); i++ ){
122
+ for (int j=0; j<a.ySize(); j++ ) {
123
+ mOut.set(i,j) = a.get(Id<int>(i),Id<int>(j))*t;
124
+ }
125
+ }
126
+ return mOut;
127
+ }
128
+ friend Matrix<T> operator+ ( const Matrix<T>& a, const T& t ) {
129
+ Matrix mOut(a.xSize(),a.ySize());
130
+ for (int i=0; i<a.xSize(); i++ ){
131
+ for (int j=0; j<a.ySize(); j++ ) {
132
+ mOut.set(i,j) = a.get(Id<int>(i),Id<int>(j))+t;
133
+ }
134
+ }
135
+ return mOut;
136
+ }
137
+ friend Matrix<T> operator- ( const Matrix<T>& a, const T& t ) {
138
+ Matrix mOut(a.xSize(),a.ySize());
139
+ for (int i=0; i<a.xSize(); i++ ){
140
+ for (int j=0; j<a.ySize(); j++ ) {
141
+ mOut.set(i,j) = a.get(Id<int>(i),Id<int>(j))-t;
142
+ }
143
+ }
144
+ return mOut;
145
+ }
146
+
147
+ // Scalar inf-norm (max) of matrix / vector...
148
+ T infnorm ( ) const {
149
+ T tOut = T();
150
+ for (int i=0; i<xSize(); i++ ){
151
+ for (int j=0; j<ySize(); j++ ) {
152
+ if ( this->get(Id<int>(i),Id<int>(j))>tOut ) tOut = this->get(Id<int>(i),Id<int>(j));
153
+ }
154
+ }
155
+ return tOut;
156
+ }
157
+
158
+ /*
159
+ // Argmax of matrix / vector... //NOT WORKING
160
+ pair<int,int> argmax ( ) const {
161
+ T tOut = T();
162
+ pair<int,int> ij();
163
+ for (int i=0; i<xSize(); i++ ){
164
+ for (int j=0; j<ySize(); j++ ) {
165
+ if ( this->get(Id<int>(i),Id<int>(j))>tOut ) {
166
+ tOut = this->get(Id<int>(i),Id<int>(j));
167
+ ij = make_pair(i,j);
168
+ }
169
+ }
170
+ }
171
+ return ij; //pair<int,int>( ij.getIndex(), ij.getIndex() );
172
+ }
173
+ */
174
+ // Diagonal matrix of vector...
175
+ friend Matrix<T> diag ( const Matrix<T>& a ) {
176
+ Matrix mOut(a.xSize(),a.xSize(),T()); // output is n x n
177
+ for (int i=0;i<a.xSize();i++) {
178
+ for (int j=0;j<a.ySize();j++) {
179
+ assert(j==0); // must be vector, n x 1
180
+ mOut.set(Id<int>(i),Id<int>(i)) += a.get(Id<int>(i),Id<int>(j));
181
+ }
182
+ }
183
+ return mOut;
184
+ }
185
+
186
+ // Ordering method (treat as bit string)...
187
+ bool operator< ( const Matrix<T>& mt ) const {
188
+ if (xSize()<mt.xSize() || ySize()<mt.ySize()) return true;
189
+ if (xSize()>mt.xSize() || ySize()>mt.ySize()) return false;
190
+ for (int i=0; i<xSize(); i++ ) {
191
+ for (int j=0; j<ySize(); j++ ) {
192
+ if ( this->get(Id<int>(i),Id<int>(j)) < mt.get(Id<int>(i),Id<int>(j)) ) return true;
193
+ else if ( this->get(Id<int>(i),Id<int>(j)) > mt.get(Id<int>(i),Id<int>(j)) ) return false;
194
+ }
195
+ }
196
+ return false;
197
+ }
198
+ bool operator== ( const Matrix<T>& a ) const {
199
+ if (xSize()!=a.xSize() || ySize()!=a.ySize()) return false;
200
+ for (int i=0;i<a.xSize();i++)
201
+ for (int j=0;j<a.ySize();j++)
202
+ if (this->get(Id<int>(i),Id<int>(j))!=a.get(Id<int>(i),Id<int>(j))) return false;
203
+ return true;
204
+ }
205
+
206
+ // Input/output methods...
207
+ friend ostream& operator<< ( ostream& os, const Matrix<T>& a ) {
208
+ os<<"\n ";
209
+ for (int i=0;i<a.xSize();i++) {
210
+ for (int j=0;j<a.ySize();j++) {
211
+ os<<((j==0)?"":",")<<a.get(Id<int>(i),Id<int>(j));
212
+ }
213
+ os<<(i==a.xSize()-1?"\n":"\n ");
214
+ }
215
+ return os;
216
+ }
217
+ friend String& operator<< ( String& str, const Matrix<T>& a ) {
218
+ str<<"\n ";
219
+ for (int i=0;i<a.xSize();i++) {
220
+ for (int j=0;j<a.ySize();j++) {
221
+ str<<((j==0)?"":",")<<a.get(Id<int>(i),Id<int>(j));
222
+ }
223
+ str<<";";
224
+ }
225
+ return str;
226
+ }
227
+ string getString( ) const;
228
+
229
+ };
230
+ template <class T>
231
+ string Matrix<T>::getString() const {
232
+ string str;
233
+ for (int i=0;i<xSize();i++) {
234
+ for (int j=0;j<ySize();j++) {
235
+ str += ((j==0)?"":",");
236
+ str += this->get(Id<int>(i),Id<int>(j));
237
+ }
238
+ str += ";";
239
+ }
240
+ return str;
241
+ }
242
+
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-gauss.h ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_GAUSS__
25
+ #define _NL_GAUSS__
26
+
27
+ #include <vector>
28
+ #include <string>
29
+ #include <math.h>
30
+ #include "nl-cpt.h"
31
+ #include "nl-prob.h"
32
+ #include "nl-list.h"
33
+
34
+ using namespace std;
35
+
36
+ static const PDFVal MEAN_THRESHOLD = 0.01; //0.0001; //0.001
37
+ static const PDFVal VARIANCE_THRESHOLD = 0.01; //0.0001; //0
38
+
39
+
40
+ ////////////////////////////////////////////////////////////////////////////////
41
+ //
42
+ // Diagonal Multivariate Gaussian Model
43
+ //
44
+ ////////////////////////////////////////////////////////////////////////////////
45
+
46
+ template <class Y>
47
+ class DiagGauss1DModel : public Generic1DModel<Y,PDFVal> {
48
+ private:
49
+ // Member variables...
50
+ string sId;
51
+ bool bModeled;
52
+ PDFVal prInvPowSqrt2PI;
53
+ SimpleHash<Id<int>,PDFVal> aMeans;
54
+ SimpleHash<Id<int>,PDFVal> aVariances;
55
+ PDFVal prInvRootNormVariances;
56
+ PDFVal prProduct;
57
+ SimpleHash<Id<int>,PDFVal> algprNegHalfInvVariances;
58
+ public:
59
+ // Constructor / destructor methods...
60
+ DiagGauss1DModel ( ) : bModeled(false) { }
61
+ DiagGauss1DModel ( const string& s ) : sId(s), bModeled(false) { }
62
+ // Specification methods...
63
+ void precomputeVarianceTerms ( ) ;
64
+ PDFVal& setMean ( int i ) { return aMeans.set(i); }
65
+ PDFVal& setVariance ( int i ) { return aVariances.set(i); }
66
+ PDFVal& setInvRootNormVar ( ) { return prInvRootNormVariances; }
67
+ PDFVal& setNegHalfInvVar ( int i ) { return algprNegHalfInvVariances.set(i); }
68
+ // Extraction methods...
69
+ PDFVal getMean ( int i ) const { return aMeans.get(i); }
70
+ PDFVal getVariance ( int i ) const { return aVariances.get(i); }
71
+ PDFVal getInvRootNormVar ( ) const { return prInvRootNormVariances; }
72
+ PDFVal getNegHalfInvVar ( int i ) const { return algprNegHalfInvVariances.get(i); }
73
+ int getNumFeats ( ) const { return Y::getSize(); }
74
+ PDFVal getProb ( const Y& ) const ;
75
+ // Input / output methods...
76
+ bool readFields ( char*[], int ) ;
77
+ void writeFields ( FILE*, const string& ) const ;
78
+ };
79
+
80
+ ////////////////////////////////////////
81
+ template <class Y>
82
+ inline void DiagGauss1DModel<Y>::precomputeVarianceTerms ( ) {
83
+ // Inverse square root of norm of variances...
84
+ setInvRootNormVar() = 1.0;
85
+ for ( int i=0; i<getNumFeats(); i++ ) setInvRootNormVar() *= 1.0/sqrt(getVariance(i));
86
+ // Negative half of inverse of variances...
87
+ for ( int i=0; i<getNumFeats(); i++ ) setNegHalfInvVar(i) = -1.0/(2.0*getVariance(i));
88
+ // Derived from variance terms...
89
+ prInvPowSqrt2PI = 1.0/pow(sqrt(2.0*M_PI),getNumFeats());
90
+ prProduct = prInvPowSqrt2PI * getInvRootNormVar();
91
+ bModeled = true;
92
+ }
93
+
94
+ ////////////////////////////////////////
95
+ template <class Y>
96
+ inline PDFVal DiagGauss1DModel<Y>::getProb ( const Y& y ) const {
97
+ // fprintf(stderr,"--------------------\n");
98
+ // y.write(stderr);
99
+ // fprintf(stderr,"\n----------\n");
100
+ // writeFields(stderr,"");
101
+ assert(bModeled);
102
+ PDFVal logprob = 0.0;
103
+ for ( int i=0; i<getNumFeats(); i++ )
104
+ logprob += getNegHalfInvVar(i) * pow(y.get(i)-getMean(i),2.0);
105
+ // for ( int i=0; i<getNumFeats(); i++ )
106
+ // fprintf(stderr,"%d %g\n", i, getNegHalfInvVar(i) * pow(y.get(i)-getMean(i),2.0));
107
+ // fprintf(stderr,"----------> %g\n",prProduct * exp(logprob));
108
+ return ( prProduct * exp(logprob) ) ;
109
+ }
110
+
111
+ ////////////////////////////////////////
112
+ template <class Y>
113
+ bool DiagGauss1DModel<Y>::readFields ( char* as[], int numFields ) {
114
+ if ( 0==strcmp(as[1],"m") && numFields>2 ) {
115
+ char* psT;
116
+ for(int i=0;i<getNumFeats();i++)
117
+ setMean(i)=atof(strtok_r((0==i)?as[2]:NULL,"_",&psT));
118
+ }
119
+ else if ( 0==strcmp(as[1],"v") && numFields>2 ) {
120
+ char* psT;
121
+ for(int i=0;i<getNumFeats();i++)
122
+ setVariance(i)=atof(strtok_r((0==i)?as[2]:NULL,"_",&psT));
123
+ }
124
+ else return false;
125
+ return true;
126
+ }
127
+
128
+ ////////////////////////////////////////
129
+ template <class Y>
130
+ void DiagGauss1DModel<Y>::writeFields ( FILE* pf, const string& sPref ) const {
131
+ fprintf(pf,"%s m = ",sPref.c_str());
132
+ for(int i=0; i<getNumFeats(); i++) fprintf(pf,"%s%f",(0==i)?"":"_",getMean(i));
133
+ fprintf ( pf, "\n" ) ;
134
+
135
+ fprintf(pf,"%s v = ",sPref.c_str());
136
+ for(int i=0; i<getNumFeats(); i++) fprintf(pf,"%s%f",(0==i)?"":"_",getVariance(i));
137
+ fprintf ( pf, "\n" ) ;
138
+ }
139
+
140
+
141
+ ////////////////////////////////////////////////////////////////////////////////
142
+
143
+ /*
144
+ template <class Y,class X>
145
+ class DiagGauss2DModel : public Generic2DModel<Y,X,PDFVal> {
146
+ private:
147
+ // Member variables...
148
+ string sId;
149
+ SimpleHash<X,DiagGauss1DModel<Y> > mMY_giv_X;
150
+ public:
151
+ // Constructor / destructor methods...
152
+ DiagGauss2DModel ( const string& s ) : sId(s) { }
153
+ // Extraction methods...
154
+ Prob getProb ( const Y& y, const X& x ) const { return mMY_giv_X.get(x).getProb(y); }
155
+ // Input / output methods...
156
+ bool readFields ( char* as[], int numFields ) {
157
+ ////if ( as[0]!=sId ) return false; // HAVE TO CHECK IN CALLIN FN NOW
158
+ if ( 0==strcmp(as[1],"m") && numFields>3 )
159
+ for ( int i=0; i<numFields-3; i++ ) mMY_giv_X.set(X(as[2])).setMean(i) = atof(as[i+4]) ;
160
+ else if ( 0==strcmp(as[1],"v") && numFields>3 )
161
+ for ( int i=0; i<numFields-3; i++ ) mMY_giv_X.set(X(as[2])).setVariance(i) = atof(as[i+4]) ;
162
+ else return false;
163
+ return true;
164
+ }
165
+ void writeFields ( FILE* pf, const string& sPref ) const {
166
+ X x;
167
+ for(bool b=x.setFirst(); b; b=x.setNext())
168
+ { fprintf(pf,"%s m ",sPref.c_str()); x.write(pf); fprintf(pf," =");
169
+ for(int i=0; i<Y::getSize(); i++) fprintf(pf," %f",mMY_giv_X.getProb(x).getMean(i));
170
+ fprintf ( pf, "\n" ) ; }
171
+ for(bool b=x.setFirst(); b; b=x.setNext())
172
+ { fprintf(pf,"%s v ",sPref.c_str()); x.write(pf); fprintf(pf," =");
173
+ for(int i=0; i<Y::getSize(); i++) fprintf(pf," %f",mMY_giv_X.getProb(x).getVariance(i));
174
+ fprintf ( pf, "\n" ) ; }
175
+ }
176
+ };
177
+
178
+ ////////////////////////////////////////////////////////////////////////////////
179
+
180
+ template <class Y,class X1,class X2>
181
+ class DiagGauss3DModel : public Generic3DModel<Y,X1,X2,PDFVal> {
182
+ private:
183
+ // Member variables...
184
+ string sId;
185
+ SimpleHash<Joint2DRV<X1,X2>,DiagGauss1DModel<Y> > mMY_giv_X1_X2;
186
+ public:
187
+ // Constructor / destructor methods...
188
+ DiagGauss3DModel ( const string& s ) : sId(s) { }
189
+ // Extraction methods...
190
+ Prob getProb ( const Y& y, const X1& x1, const X2& x2 ) const { return mMY_giv_X1_X2.get(x1,x2).getProb(y); }
191
+ // Input / output methods...
192
+ bool readFields ( char* as[], int numFields ) {
193
+ if ( as[0]!=sId ) return false;
194
+ if ( 0==strcmp(as[1],"m") && numFields>4 )
195
+ for ( int i=0; i<numFields-4; i++ ) mMY_giv_X1_X2.set(Joint2DRV<X1,X2>(X1(as[2]),X2(as[2]))).setMean(i) = atof(as[i+4]) ;
196
+ else if ( 0==strcmp(as[1],"v") && numFields>4 )
197
+ for ( int i=0; i<numFields-4; i++ ) mMY_giv_X1_X2.set(Joint2DRV<X1,X2>(X1(as[2]),X2(as[2]))).setVariance(i) = atof(as[i+4]) ;
198
+ else return false;
199
+ return true;
200
+ }
201
+ void writeFields ( FILE* pf, string& sPref ) const {
202
+ X1 x1; X2 x2;
203
+ for(bool b1=x1.setFirst(); b1; b1=x1.setNext()) {
204
+ for(bool b2=x2.setFirst(); b2; b2=x2.setNext())
205
+ { fprintf(pf,"%s m ",sPref.c_str()); x1.write(pf); fprintf(pf," "); x2.write(pf); fprintf(pf," =");
206
+ for(int i=0; i<Y::getSize(); i++) fprintf(pf," %f",mMY_giv_X1_X2.get(Joint2DRV<X1,X2>(x1,x2)).getMean(i));
207
+ fprintf(pf,"\n"); }
208
+ for(bool b2=x2.setFirst(); b2; b2=x2.setNext())
209
+ { fprintf(pf,"%s v ",sPref.c_str()); x1.write(pf); fprintf(pf," "); x2.write(pf); fprintf(pf," =");
210
+ for(int i=0; i<Y::getSize(); i++) fprintf(pf," %f",mMY_giv_X1_X2.get(Joint2DRV<X1,X2>(x1,x2)).getVariance(i));
211
+ fprintf(pf,"\n"); }
212
+ }
213
+ }
214
+ };
215
+ */
216
+
217
+ ////////////////////////////////////////////////////////////////////////////////
218
+ //
219
+ // Trainable Diagonal Multivariate Gaussian Model
220
+ //
221
+ ////////////////////////////////////////////////////////////////////////////////
222
+
223
+ template <class Y>
224
+ class TrainableDiagGauss1DModel : public DiagGauss1DModel<Y> {
225
+ public:
226
+ TrainableDiagGauss1DModel ( ) : DiagGauss1DModel<Y>() { }
227
+ TrainableDiagGauss1DModel ( const string& s ) : DiagGauss1DModel<Y>(s) { }
228
+ // input / output methods...
229
+ void setFields ( const List<pair<const Y*,Prob> >& ) ;
230
+ };
231
+
232
+ ////////////////////////////////////////
233
+ template <class Y>
234
+ void TrainableDiagGauss1DModel<Y>::setFields ( const List<pair<const Y*,Prob> >& lyp ) {
235
+
236
+ // For each dimension...
237
+ for ( int i=0; i<DiagGauss1DModel<Y>::getNumFeats(); i++ ) {
238
+
239
+ // Calc means...
240
+ double curMean = DiagGauss1DModel<Y>::getMean(i);
241
+ DiagGauss1DModel<Y>::setMean(i) = 0.0;
242
+
243
+ // For each Y...
244
+ for ( const ListedObject<pair<const Y*,Prob> >* pyp=lyp.getFirst(); pyp; pyp=lyp.getNext(pyp) ) {
245
+ const Y& y = *pyp->first; // data value
246
+ const Prob& prEmpY = pyp->second; // empirical prob
247
+
248
+ //printf("cal mean i=%d x1=%s x2=%s aaaprYpsi.get(yd,x1,x2)=%f\n", i, x1.getString(), x2.getString(), (double)aaaprYpsi.getProb(yd,x1,x2));
249
+ DiagGauss1DModel<Y>::setMean(i) += prEmpY * y.get(i);
250
+ }
251
+
252
+ // // If any change exceeds thresh, continue...
253
+ // if ( bShouldStop && ( curMean - DiagGauss1DModel<Y>::getMean(i) > MEAN_THRESHOLD ||
254
+ // curMean - DiagGauss1DModel<Y>::getMean(i) < -MEAN_THRESHOLD ) ) bShouldStop = false;
255
+
256
+ //printf("cal mean i=%d getMean(i)=%f\n", i, DiagGauss1DModel<Y>::getMean(i));
257
+
258
+ // Calc variances...
259
+ double curVar = DiagGauss1DModel<Y>::getVariance(i);
260
+ DiagGauss1DModel<Y>::setVariance(i) = 0.0;
261
+
262
+ // For each Y...
263
+ for ( const ListedObject<pair<const Y*,Prob> >* pyp=lyp.getFirst(); pyp; pyp=lyp.getNext(pyp) ) {
264
+ const Y& y = *pyp->first; // data value
265
+ const Prob& prEmpY = pyp->second; // empirical prob
266
+
267
+ //printf("cal var i=%d yd=%s %f %f %f\n", i, yd.getString(), aaaprYpsi.get(yd,x1,x2), getMean(x1,x2,i), yd.get(i));
268
+ DiagGauss1DModel<Y>::setVariance(i) += prEmpY * pow(DiagGauss1DModel<Y>::getMean(i)-y.get(i),2) ;
269
+ }
270
+
271
+ // // If any change exceeds thresh, continue...
272
+ // if ( bShouldStop && ( curVar - DiagGauss1DModel<Y>::getVariance(i) > VARIANCE_THRESHOLD ||
273
+ // curVar - DiagGauss1DModel<Y>::getVariance(i) < -VARIANCE_THRESHOLD ) ) bShouldStop = false;
274
+
275
+ // Avoid div by zero...
276
+ if (DiagGauss1DModel<Y>::getVariance(i) < 1.0) DiagGauss1DModel<Y>::setVariance(i) = 1.0;
277
+
278
+ //printf("cal variance i=%d var=%f\n", i, DiagGauss1DModel<Y>::getVariance(i));
279
+ }
280
+ DiagGauss1DModel<Y>::precomputeVarianceTerms();
281
+ }
282
+
283
+
284
+ #endif /*_NL_GAUSS__*/
285
+
286
+
287
+
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hash.h ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef __NL_HASH_H_
25
+ #define __NL_HASH_H_
26
+
27
+ #include <cassert>
28
+ //#include <tr1/unordered_map>
29
+ #include <ext/hash_map>
30
+ using namespace __gnu_cxx;
31
+
32
+
33
+ ///////////////////////////////////////////////////////////////////////////////
34
+
35
+ template<class T>
36
+ class SimpleHashFn {
37
+ public:
38
+ size_t operator() ( const T& t ) const { return t.getHashKey(); }
39
+ };
40
+
41
+
42
+ template<class T>
43
+ class SimpleHashEqual {
44
+ public:
45
+ bool operator() ( const T& t1, const T& t2 ) const { return (t1 == t2); }
46
+ };
47
+
48
+
49
+ template<class X, class Y>
50
+ class SimpleHash : public hash_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > /*public tr1::unordered_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> >*/ {
51
+ private:
52
+ typedef hash_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > OrigHash;
53
+ // typedef tr1::unordered_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > OrigHash;
54
+ // tr1::unordered_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > mxy;
55
+ static const Y yDummy;
56
+ //static Y yNonconstDummy;
57
+
58
+ public:
59
+ // typedef typename OrigHash::const_iterator const_iterator;
60
+ // typedef typename OrigHash::iterator iterator;
61
+ // static const const_iterator iDummy;
62
+ // Constructor / destructor methods...
63
+ SimpleHash ( ) : OrigHash() { }
64
+ SimpleHash ( int i ) : OrigHash(i) { }
65
+ SimpleHash (const SimpleHash& s) : OrigHash(s) { }
66
+ // Specification methods...
67
+ Y& set ( const X& x ) { return OrigHash::operator[](x); }
68
+ // Extraction methods...
69
+ const Y& get ( const X& x ) const { return (OrigHash::end()!=OrigHash::find(x)) ? OrigHash::find(x)->second : yDummy; }
70
+ bool contains ( const X& x ) const { return (OrigHash::end()!=OrigHash::find(x)); }
71
+ // const Y& get ( const X& x ) const { return (mxy.end()!=mxy.find(x)) ? mxy.find(x)->second : yDummy; }
72
+ // Y& set ( const X& x ) { return mxy[x]; }
73
+ friend ostream& operator<< ( ostream& os, const SimpleHash<X,Y>& h ) {
74
+ for ( typename SimpleHash<X,Y>::const_iterator it=h.begin(); it!=h.end(); it++ )
75
+ os<<((it==h.begin())?"":",")<<it->first<<":"<<it->second;
76
+ return os;
77
+ }
78
+ };
79
+ template<class X, class Y> const Y SimpleHash<X,Y>::yDummy = Y();
80
+ //template<class X, class Y> Y SimpleHash<X,Y>::yNonconstDummy; // = Y();
81
+
82
+ /*
83
+ template<class X, class Y>
84
+ class SimpleMultiHash : public tr1::unordered_multimap<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > {
85
+ private:
86
+ typedef tr1::unordered_multimap<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > OrigHash;
87
+ public:
88
+ typedef pair<typename OrigHash::const_iterator,typename OrigHash::const_iterator> const_iterator_pair;
89
+
90
+ // Constructor / destructor methods...
91
+ SimpleMultiHash ( ) : OrigHash() { }
92
+ SimpleMultiHash ( int i ) : OrigHash(i) { }
93
+ // Specification methods...
94
+ Y& add ( const X& x ) { return insert(typename OrigHash::value_type(x,Y()))->second; }
95
+ // Extraction methods...
96
+ bool contains ( const X& x ) const { return (OrigHash::end()!=OrigHash::find(x)); }
97
+ bool contains ( const X& x, const Y& y ) const {
98
+ if (OrigHash::end()==OrigHash::find(x)) return false;
99
+ for ( const_iterator_pair ii=OrigHash::equal_range(x); ii.first!=ii.second; ii.first++ )
100
+ if ( y == ii.first->second ) return true;
101
+ return false;
102
+ }
103
+ };
104
+ */
105
+ #endif // __NL_HASH_H_
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-heap.h ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_MINHEAP_
25
+ #define _NL_MINHEAP_
26
+
27
+ #include "nl-safeids.h"
28
+
29
+ ////////////////////////////////////////////////////////////////////////////////
30
+
31
+ template <class R, bool outrank(const R&, const R&)>
32
+ class Heap {
33
+ private:
34
+ Array<R> at;
35
+ int iNextToFill;
36
+ //SafeArray1D<Id<int>,R> at;
37
+ // Private specification methods...
38
+ int heapify ( unsigned int ) ;
39
+ public:
40
+ // Constructor / destructor methods...
41
+ Heap<R,outrank> ( ) : at(10), iNextToFill(0) { }
42
+ Heap<R,outrank> ( int i ) : at(i), iNextToFill(0) { }
43
+ Heap<R,outrank> ( int i, const R& r ) : at(i,r), iNextToFill(0) { }
44
+ // Specification methods...
45
+ void init ( int i ) { iNextToFill=0; at.init(i); }
46
+ void clear ( ) { iNextToFill=0; }
47
+ unsigned int fixIncRank ( unsigned int i );
48
+ unsigned int fixDecRank ( unsigned int i );
49
+ R& set ( unsigned int i ) { return at.set(i-1); }
50
+ void enqueue ( const R& r ) { set(iNextToFill+1)=r; fixIncRank(iNextToFill+1); iNextToFill++; }
51
+ R dequeueTop ( ) { R r=get(1); iNextToFill--; set(1)=get(iNextToFill+1); set(iNextToFill+1)=R(); fixDecRank(1); return r; }
52
+ ////R& set ( const Id<int>& i ) { return at.set(i); }
53
+ R& setTop ( ) { return at.set(1-1); }
54
+ // Extraction methods...
55
+ int getSize ( ) const { return iNextToFill; }
56
+ const R& getTop ( ) const { return at.get(1-1); }
57
+ const R& get ( unsigned int i ) const { return at.get(i-1); }
58
+ ////const R& get ( const Id<int>& i ) const { return at.get(i); }
59
+ // Input / output methods...
60
+ friend ostream& operator<< ( ostream& os, const Heap<R,outrank>& h ) { for(int i=0;i<h.iNextToFill;i++) os<<h.at.get(i)<<"\n"; return os; }
61
+ };
62
+
63
+ ////////////////////////////////////////////////////////////////////////////////
64
+
65
+ template <class R, bool outrank(const R&, const R&)>
66
+ int Heap<R,outrank>::heapify ( unsigned int ind ) {
67
+ // Find best of parent, left child, right child...
68
+ unsigned int indBest = ind;
69
+ indBest = (ind*2 <= (unsigned int)iNextToFill &&
70
+ outrank(get(ind*2),get(indBest)))
71
+ ? ind*2 : indBest;
72
+ indBest = (ind*2+1 <= (unsigned int)iNextToFill &&
73
+ outrank(get(ind*2+1),get(indBest)))
74
+ ? ind*2+1 : indBest;
75
+
76
+ // If parent isn't best, restore heap property...
77
+ if ( indBest != ind ) {
78
+ // Swap heap elements...
79
+ R rTemp = get(ind);
80
+ set(ind) = get(indBest);
81
+ set(indBest) = rTemp;
82
+ // Recurse...
83
+ return heapify(indBest);
84
+ }
85
+ else return ind;
86
+ }
87
+
88
+ template <class R, bool outrank(const R&, const R&)>
89
+ unsigned int Heap<R,outrank>::fixIncRank ( unsigned int ind ) { //const R& rec ) {
90
+ // If child outranks parent, restore heap property...
91
+ if ( outrank(get(ind),get((ind==1)?1:ind/2)) ) {
92
+ // Swap heap elements...
93
+ R rTemp = get((ind==1)?1:ind/2);
94
+ set((ind==1)?1:ind/2) = get(ind);
95
+ set(ind) = rTemp;
96
+ // Recurse on parent...
97
+ return fixIncRank(ind/2);
98
+ }
99
+ else return ind;
100
+ }
101
+
102
+ template <class R, bool outrank(const R&, const R&)>
103
+ unsigned int Heap<R,outrank>::fixDecRank ( unsigned int ind ) { //const R& rec ) {
104
+ return heapify(ind);
105
+ }
106
+
107
+
108
+ ////////////////////////////////////////////////////////////////////////////////
109
+
110
+ template <class R>
111
+ class MinHeap {
112
+ private:
113
+ Array<R> at;
114
+ //SafeArray1D<Id<int>,R> at;
115
+ // Private specification methods...
116
+ int minHeapify ( unsigned int ) ;
117
+ public:
118
+ // Constructor / destructor methods...
119
+ MinHeap<R> ( ) : at(10) { }
120
+ MinHeap<R> ( int i ) : at(i) { }
121
+ MinHeap<R> ( int i, const R& r ) : at(i,r) { }
122
+ // Specification methods...
123
+ void init ( int i ) { at.init(i); }
124
+ void clear ( ) { at.clear(); }
125
+ int fixDecr ( int i );
126
+ int fixIncr ( int i );
127
+ R& set ( unsigned int i ) { return at.set(i); }
128
+ ////R& set ( const Id<int>& i ) { return at.set(i); }
129
+ R& setMin ( ) { return at.set(1-1); }
130
+ // Extraction methods...
131
+ int getSize ( ) const { return at.getSize(); }
132
+ const R& getMin ( ) const { return at.get(1-1); }
133
+ const R& get ( unsigned int i ) const { return at.get(i); }
134
+ ////const R& get ( const Id<int>& i ) const { return at.get(i); }
135
+ };
136
+
137
+ ////////////////////////////////////////////////////////////////////////////////
138
+
139
+ template <class R>
140
+ int MinHeap<R>::minHeapify ( unsigned int ind ) {
141
+ // Find min of parent, left child, right child...
142
+ unsigned int indMin = ind ;
143
+ indMin = (ind*2 <= (unsigned int)at.getSize() &&
144
+ at.get(ind*2-1).getScore() < at.get(indMin-1).getScore())
145
+ ? ind*2 : indMin ;
146
+ indMin = (ind*2+1 <= (unsigned int)at.getSize() &&
147
+ at.get(ind*2+1-1).getScore() < at.get(indMin-1).getScore())
148
+ ? ind*2+1 :indMin;
149
+
150
+ // If parent isn't min, restore heap property...
151
+ if ( indMin != ind ) {
152
+ // Swap heap elements...
153
+ R rTemp = at.get(ind-1);
154
+ at.set(ind-1) = at.get(indMin-1);
155
+ at.set(indMin-1) = rTemp;
156
+ // Recurse...
157
+ return minHeapify(indMin);
158
+ }
159
+ else return ind;
160
+ }
161
+
162
+ template <class R>
163
+ int MinHeap<R>::fixDecr ( int i ) { //const R& rec ) {
164
+ // If parent isn't min, restore heap property...
165
+ if ( at.get((i+1)/2).getScore() > at.get(i).getScore() ) {
166
+ // Swap heap elements...
167
+ R rTemp = at.get((i+1)/2);
168
+ at.set((i+1)/2) = at.get(i);
169
+ at.set(i) = rTemp;
170
+ // Recurse on parent...
171
+ return fixDecr((i+1)/2);
172
+ }
173
+ else return i;
174
+ }
175
+
176
+ template <class R>
177
+ int MinHeap<R>::fixIncr ( int i ) { //const R& rec ) {
178
+ return minHeapify(i+1)-1;
179
+ }
180
+
181
+ #endif //_NL_HEAP_
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-hmmloop.h ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_HMMLOOP_
25
+ #define _NL_HMMLOOP_
26
+ #include <list>
27
+ #include <string>
28
+ #include <boost/thread/thread.hpp>
29
+ #include <boost/thread/mutex.hpp>
30
+ #include <boost/bind.hpp>
31
+ #include "nl-prob.h"
32
+ #include "nl-safeids.h"
33
+ #include "nl-beam.h"
34
+
35
+ typedef int Frame;
36
+
37
+ ////////////////////////////////////////////////////////////////////////////////
38
+ ////////////////////////////////////////////////////////////////////////////////
39
+ //
40
+ // NullBackDat - default empty back-pointer data; can replace with word or sem relation
41
+ //
42
+ ////////////////////////////////////////////////////////////////////////////////
43
+
44
+ template <class Y>
45
+ class NullBackDat {
46
+ static const string sDummy;
47
+ char dummy_data_member_to_avoid_compile_warning;
48
+ public:
49
+ NullBackDat () { dummy_data_member_to_avoid_compile_warning=0; }
50
+ NullBackDat (const Y& y) { dummy_data_member_to_avoid_compile_warning=0; }
51
+ void write (FILE*) const { }
52
+ string getString() const { return sDummy; }
53
+ friend ostream& operator<< ( ostream& os, const NullBackDat& nb ) { return os; }
54
+ };
55
+ template <class Y>
56
+ const string NullBackDat<Y>::sDummy ( "" );
57
+
58
+
59
+ ////////////////////////////////////////////////////////////////////////////////
60
+ ////////////////////////////////////////////////////////////////////////////////
61
+ //
62
+ // Index - pointer to source in previous beam heap
63
+ //
64
+ ////////////////////////////////////////////////////////////////////////////////
65
+
66
+ class Index : public Id<int> {
67
+ public:
68
+ Index ( ) { }
69
+ Index (int i) {set(i);}
70
+ Index& operator++ ( ) {set(toInt()+1); return *this;}
71
+ };
72
+
73
+
74
+ ////////////////////////////////////////////////////////////////////////////////
75
+ ////////////////////////////////////////////////////////////////////////////////
76
+ //
77
+ // TrellNode - node in viterbi trellis
78
+ //
79
+ ////////////////////////////////////////////////////////////////////////////////
80
+
81
+ template <class S, class B>
82
+ class TrellNode {
83
+ private:
84
+
85
+ // Data members...
86
+ Index indSource;
87
+ B backptrData;
88
+ S sId;
89
+ LogProb lgprMax;
90
+
91
+ public:
92
+
93
+ // Constructor / destructor methods...
94
+ TrellNode ( ) { }
95
+ TrellNode ( const Index& indS, const S& sI, const B& bDat, LogProb lgpr)
96
+ { indSource=indS; sId=sI; lgprMax=lgpr; backptrData=bDat; /* fo = -1; */ }
97
+
98
+ // Specification methods...
99
+ const Index& setSource ( ) const { return indSource; }
100
+ const B& setBackData( ) const { return backptrData; }
101
+ const S& setId ( ) const { return sId; }
102
+ LogProb& setScore ( ) { return lgprMax; }
103
+
104
+ // Extraction methods...
105
+ bool operator== ( const TrellNode<S,B>& tnsb ) const { return(sId==tnsb.sId); }
106
+ // size_t getHashKey ( ) const { return sId.getHashKey(); }
107
+ const Index& getSource ( ) const { return indSource; }
108
+ const B& getBackData( ) const { return backptrData; }
109
+ const S& getId ( ) const { return sId; }
110
+ LogProb getLogProb ( ) const { return lgprMax; }
111
+ LogProb getScore ( ) const { return lgprMax; }
112
+
113
+ // Input / output methods...
114
+ friend ostream& operator<< ( ostream& os, const TrellNode& tn ) { return os<<tn.indSource<<","<<tn.backptrData<<","<<tn.sId<<","<<tn.lgprMax; }
115
+ };
116
+
117
+
118
+ ////////////////////////////////////////////////////////////////////////////////
119
+ ////////////////////////////////////////////////////////////////////////////////
120
+ //
121
+ // HMMLoop
122
+ //
123
+ ////////////////////////////////////////////////////////////////////////////////
124
+
125
+ template <class MY, class MX, class S=typename MY::RandVarType, class B=NullBackDat<typename MY::RandVarType> >
126
+ class HMMLoop {
127
+ private:
128
+ typedef std::pair<Index,B> IB;
129
+ // Data members...
130
+ MY modY;
131
+ MX modX;
132
+ SafeArray2D<Id<Frame>,Id<int>,TrellNode<S,B> > aatnTrellis;
133
+ const int BEAM_WIDTH, LOOP_LENGTH;
134
+ Frame frameLast;
135
+ int iNextNode;
136
+ public:
137
+ // Static member varaibles...
138
+ static bool OUTPUT_QUIET;
139
+ static bool OUTPUT_NOISY;
140
+ static bool OUTPUT_VERYNOISY;
141
+ // static int BEAM_WIDTH;
142
+ // Constructor / destructor methods...
143
+ HMMLoop ( int, const char*[], int, int, const S& ) ;
144
+ // Specification methods...
145
+ // void init ( int, int, const S& ) ;
146
+ // void init ( int, int, SafeArray1D<Id<int>,pair<S,LogProb> >* );
147
+ const TrellNode<S,B>& update ( const typename MX::RandVarType& ) ;
148
+ const TrellNode<S,B>& getTrellNode ( Frame t, Index i ) { return aatnTrellis.get(t%LOOP_LENGTH,i); }
149
+ TrellNode<S,B>& setTrellNode ( Frame t, Index i ) { return aatnTrellis.set(t%LOOP_LENGTH,i); }
150
+
151
+ /*
152
+ void updateSerial ( const typename MX::RandVarType& ) ;
153
+ void updatePara ( const typename MX::RandVarType& ) ;
154
+ void each ( const typename MX::RandVarType&, Beam<LogProb,S,IB>&, SafeArray1D<Id<int>,std::pair<std::pair<S,IB>,LogProb> >& ) ;
155
+ // Extraction methods...
156
+ const TrellNode<S,B>& getTrellNode ( int i ) const { return aatnTrellis.get(frameLast,i); }
157
+ int getBeamUsed ( int ) const ;
158
+ // Input / output methods...
159
+ void writeMLS ( FILE* ) const ;
160
+ void writeMLS ( FILE*, const S& ) const ;
161
+ double getCurrSum(int) const;
162
+ void writeCurr ( FILE*, int ) const ;
163
+ void writeCurrSum ( FILE*, int ) const ;
164
+ void gatherElementsInBeam( SafeArray1D<Id<int>,pair<S,LogProb> >* result, int f ) const;
165
+ void writeCurrEntropy ( FILE*, int ) const;
166
+ //void writeCurrDepths ( FILE*, int ) const;
167
+ void writeFoll ( FILE*, int, int, const typename MX::RandVarType& ) const ;
168
+ void writeFollRanked ( FILE*, int, int, const typename MX::RandVarType&, bool ) const ;
169
+ std::list<string> getMLS() const;
170
+ std::list<TrellNode<S,B> > getMLSnodes() const;
171
+ std::list<string> getMLS(const S&) const;
172
+ std::list<TrellNode<S,B> > getMLSnodes(const S&) const;
173
+ */
174
+ };
175
+ template <class MY, class MX, class S, class B> bool HMMLoop<MY,MX,S,B>::OUTPUT_QUIET = false;
176
+ template <class MY, class MX, class S, class B> bool HMMLoop<MY,MX,S,B>::OUTPUT_NOISY = false;
177
+ template <class MY, class MX, class S, class B> bool HMMLoop<MY,MX,S,B>::OUTPUT_VERYNOISY = false;
178
+ //template <class MY, class MX, class S, class B> int HMMLoop<MY,MX,S,B>::BEAM_WIDTH = 1;
179
+
180
+
181
+ ////////////////////////////////////////////////////////////////////////////////
182
+ ////////////////////////////////////////////////////////////////////////////////
183
+
184
+ template <class MY, class MX, class S, class B>
185
+ HMMLoop<MY,MX,S,B>::HMMLoop ( int nArgs, const char* apsArgs[], int w, int l, const S& sInit ) : BEAM_WIDTH(w), LOOP_LENGTH(l) {
186
+
187
+ // For each model file in command line arguments...
188
+ for ( int iArg=1; iArg<nArgs; iArg++ ) {
189
+
190
+ // Try to open model file...
191
+ FILE* pf = fopen(apsArgs[iArg],"r");
192
+ // Complain if can't open model file...
193
+ if ( NULL == pf ) {
194
+ cout<<"ERROR: can't open file '"<<apsArgs[iArg]<<"'!\n";
195
+ cout<<"Terminating process with failure code 1.\n";
196
+ exit(1);
197
+ }
198
+
199
+ // Initialize stream buffer and line number...
200
+ IStreamSource iss(pf);
201
+ int linenum=0;
202
+
203
+ cout<<"Reading file '"<<apsArgs[iArg]<<"'...\n";
204
+
205
+ // For each line of input...
206
+ for ( IStream is(iss),is1; IStream()!=is; is=is1,iss.compress() ) {
207
+
208
+ // Increment line number...
209
+ linenum++;
210
+ // Count off every 100K lines...
211
+ if (linenum%100000==0) cout<<" Reading line "<<linenum<<"...\n";
212
+
213
+ // Try to read each line into each model...
214
+ String s;
215
+ if ( (is1=(is>>"#">>s>>"\n")) == IStream() &&
216
+ (is1=(is>>modY>> "\n")) == IStream() &&
217
+ (is1=(is>>modX>> "\n")) == IStream() &&
218
+ (is1=(is>>s >> "\n")) != IStream() )
219
+ // Complain if bad format...
220
+ cout<<" ERROR in '"<<apsArgs[iArg]<<"', line "<<linenum<<": can't process '"<<s<<"'!\n";
221
+ }
222
+ cout<<"Done reading file '"<<apsArgs[iArg]<<"'.\n";
223
+ fclose(pf);
224
+ }
225
+ cout<<"Done reading all model files.\n";
226
+ //modY.dump(cout,"Y");
227
+ //modX.dump(cout,"X");
228
+
229
+ // Alloc trellis...
230
+ aatnTrellis.init(LOOP_LENGTH,BEAM_WIDTH);
231
+ frameLast=LOOP_LENGTH;
232
+ // Set initial element at first time slice...
233
+ setTrellNode(frameLast,0) = TrellNode<S,B> ( Index(0), sInit, B(), 0 ) ;
234
+
235
+ cout<<"Begin processing input...\n";
236
+ IStreamSource iss(stdin);
237
+ typename MX::RandVarType x;
238
+
239
+ // For each frame...
240
+ for ( IStream is(iss); is!=IStream(); iss.compress() ) {
241
+
242
+ // // Show beam...
243
+ // cout<<"-----BEAM:t="<<frameLast-LOOP_LENGTH<<"-----\n";
244
+ // for(int i=0;i<BEAM_WIDTH;i++)
245
+ // cout<<getTrellNode(frameLast,i)<<"\n";
246
+ // cout<<"--------------\n";
247
+
248
+ // Read spectrum (as frame audio)...
249
+ is=is>>x;
250
+
251
+ // // Show spectrum...
252
+ // cout<<frameLast-2*LOOP_LENGTH+1<<" "<<x<<"\n";
253
+ // // Show spectrum with bin numbers...
254
+ // cout<<frameLast-2*LOOP_LENGTH+1;
255
+ // for(int i=0; i<NUM_FREQUENCIES; i++)
256
+ // cout<<((i==0)?' ':',')<<i<<":"<<x.get(i);
257
+ // cout<<"\n";
258
+
259
+ // Update trellis...
260
+ const TrellNode<S,B>& tn = update(x);
261
+
262
+ // Show recognized hidden variable values...
263
+ cout<<frameLast-2*LOOP_LENGTH+1<<":'"<<tn<<"'\n";
264
+ cout.flush();
265
+ }
266
+ cout<<"Done processing input.\n";
267
+ }
268
+
269
+
270
+ ////////////////////////////////////////////////////////////////////////////////
271
+
272
+ template <class A, class B>
273
+ inline bool outRank ( const quad<A,B,LogProb,Id<int> >& a1,
274
+ const quad<A,B,LogProb,Id<int> >& a2 ) { return (a1.third>a2.third); }
275
+
276
+ template <class MY, class MX, class S, class B>
277
+ const TrellNode<S,B>& HMMLoop<MY,MX,S,B>::update ( const typename MX::RandVarType& x ) {
278
+
279
+ // Increment frame counter...
280
+ frameLast++;
281
+
282
+ // Init beam for new frame...
283
+ Beam<LogProb,S,IB> btn(BEAM_WIDTH);
284
+ SafeArray1D<Id<int>,std::pair<std::pair<S,IB>,LogProb> > atnSorted (BEAM_WIDTH);
285
+
286
+ typedef quad<int,typename MY::IterVal,LogProb,Id<int> > SHPI;
287
+ Heap < SHPI, outRank<int,typename MY::IterVal > > ashpiQueue;
288
+ SHPI shpi, shpiTop;
289
+ int aCtr;
290
+
291
+ ashpiQueue.clear();
292
+ //shpi.first = -1;
293
+ //shpi.second = YModel::IterVal();
294
+ //shpi.third = 1.0;
295
+ shpi.first = 0;
296
+ shpi.third = getTrellNode(frameLast-1,shpi.first).getScore();
297
+ shpi.third *= modY.setIterProb ( shpi.second, getTrellNode(frameLast-1,shpi.first).getId(), aCtr=-1 ); // , x, aCtr=-1 );
298
+ //S s; modY.setTrellDat(s,shpi.second);
299
+ shpi.fourth = -1;
300
+ ////cerr<<"????? "<<shpi<<"\n";
301
+ ashpiQueue.enqueue(shpi);
302
+
303
+ bool bFull=false;
304
+
305
+ // For each ranked value of transition destination...
306
+ for ( int iTrg=0; !bFull && ashpiQueue.getSize()>0; iTrg++ ) {
307
+ // Iterate A* (best-first) search until a complete path is at the top of the queue...
308
+ while ( ashpiQueue.getSize() > 0 && ashpiQueue.getTop().fourth < MY::IterVal::NUM_ITERS ) {
309
+ // Remove top...
310
+ shpiTop = ashpiQueue.dequeueTop();
311
+ // Fork off (try to advance each elementary variable a)...
312
+ for ( int a=shpiTop.fourth.toInt(); a<=MY::IterVal::NUM_ITERS; a++ ) {
313
+ // Copy top into new queue element...
314
+ shpi = shpiTop;
315
+ // At variable position -1, advance beam element for transition source...
316
+ if ( a == -1 ) shpi.first++;
317
+ // Incorporate prob from transition source...
318
+ shpi.third = getTrellNode(frameLast-1,shpi.first).getScore();
319
+ if ( shpi.third > LogProb() ) {
320
+ // Try to advance variable at position a and return probability (subsequent variables set to first, probability ignored)...
321
+ shpi.third *= modY.setIterProb ( shpi.second, getTrellNode(frameLast-1,shpi.first).getId(), aCtr=a ); // , x, aCtr=a );
322
+ // At end of variables, incorporate observation probability...
323
+ if ( a == MY::IterVal::NUM_ITERS && shpi.fourth != MY::IterVal::NUM_ITERS )
324
+ shpi.third *= modX.getProb ( x, S(shpi.second) );
325
+ //// { S s; modY.setTrellDat(s,shpi.second); shpi.third *= modX.getProb(x,s); }
326
+ // Record variable position at which this element was forked off...
327
+ shpi.fourth = a;
328
+ //cerr<<" from partial: "<<shpiTop<<"\n to partial: "<<shpi<<"\n";
329
+ if ( shpi.third > LogProb() ) {
330
+ ////if ( frameLast == 4 ) cerr<<" from partial: "<<shpiTop<<"\n to partial: "<<shpi<<"\n";
331
+ // If valid, add to queue...
332
+ ashpiQueue.enqueue(shpi);
333
+ //cerr<<"--------------------\n"<<ashpiQueue;
334
+ }
335
+ }
336
+ }
337
+ // Remove top...
338
+ //cerr<<"/-----A-----\\\n"<<ashpiQueue<<"\\-----A-----/\n";
339
+ //if ( ashpiQueue.getTop().fourth != MY::IterVal::NUM_ITERS ) ashpiQueue.dequeueTop();
340
+ ////cerr<<"/-----B-----\\\n"<<ashpiQueue<<"\\-----B-----/\n";
341
+ ////cerr<<ashpiQueue.getSize()<<" queue elems, "<<ashpiQueue.getTop()<<"\n";
342
+ }
343
+
344
+ ////cerr<<"-----*-----\n"<<ashpiQueue<<"-----*-----\n";
345
+ ////cerr<<ashpiQueue.getSize()<<" queue elems **\n";
346
+
347
+ // Add best transition (top of queue)...
348
+ //modX.getProb(o,modY.setTrellDat(ashpiQueue.getTop().first,ashpiQueue.getTop().second));
349
+ if ( ashpiQueue.getSize() > 0 ) {
350
+ S s ( ashpiQueue.getTop().second );
351
+ ////S s; modY.setTrellDat(s,ashpiQueue.getTop().second);
352
+ bFull |= btn.tryAdd ( s, IB(ashpiQueue.getTop().first,B(ashpiQueue.getTop().second)), ashpiQueue.getTop().third );
353
+ ////cerr<<ashpiQueue.getSize()<<" queue elems A "<<ashpiQueue.getTop()<<"\n";
354
+ ////cerr<<"/-----A-----\\\n"<<ashpiQueue<<"\\-----A-----/\n";
355
+ ashpiQueue.dequeueTop();
356
+ ////cerr<<"/-----B-----\\\n"<<ashpiQueue<<"\\-----B-----/\n";
357
+ ////cerr<<ashpiQueue.getSize()<<" queue elems B "<<ashpiQueue.getTop()<<"\n";
358
+ //cerr<<"."; cerr.flush();
359
+ }
360
+ }
361
+
362
+ ////cerr<<"-----*-----\n"<<ashpiQueue<<"-----*-----\n";
363
+
364
+ btn.sort(atnSorted);
365
+
366
+ // Copy sorted beam to trellis...
367
+ Index iOriginOfBest;
368
+ int j=0;
369
+ for(int i=0;i<BEAM_WIDTH;i++) {
370
+ const std::pair<std::pair<S,IB>,LogProb>* tn1 = &atnSorted.get(i);
371
+ Index iOrigin = tn1->first.second.first;
372
+ // Determine origin at beginning of loop...
373
+ for ( Frame t=frameLast-1; t>frameLast-LOOP_LENGTH+1; t-- )
374
+ iOrigin = getTrellNode(t,iOrigin).getSource();
375
+ if ( 0 == i ) iOriginOfBest = iOrigin;
376
+ // If new hypothesis has same origin, add to beam...
377
+ if ( iOriginOfBest == iOrigin ) {
378
+ setTrellNode(frameLast,j++)=TrellNode<S,B>(tn1->first.second.first,
379
+ tn1->first.first,
380
+ tn1->first.second.second,
381
+ tn1->second);
382
+ }
383
+ }
384
+ // Clear out rest of beam...
385
+ for ( ; j<BEAM_WIDTH; j++ )
386
+ setTrellNode(frameLast,j) = TrellNode<S,B>();
387
+
388
+ ////modY.update();
389
+
390
+ return getTrellNode(frameLast-LOOP_LENGTH+1,iOriginOfBest);
391
+ }
392
+
393
+
394
+
395
+
396
+ #endif //_NL_HMMLOOP_
397
+
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-iomacros.h ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef NL_IO_MACROS__
25
+ #define NL_IO_MACROS__
26
+
27
+ //#include <sys/types.h>
28
+ //#include <sys/socket.h>
29
+ //#include <netinet/in.h>
30
+ //#include <netdb.h>
31
+
32
+
33
+ #define NUM(c) ((c>='0' && c<='9'))
34
+ #define ALPHANUM(c) ((c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9'))
35
+ #define SPACE(c) (c==' ')
36
+ #define WHITESPACE(c) (c==' ' || c=='\t' || c=='\n')
37
+
38
+ #define CONSUME_OPT(f,c,b,l) c = (b) ? getc(f)+0*(l+=(c=='\n')?1:0) : c
39
+
40
+ #define CONSUME_ONE(f,c,b,s,l) c = (b) ? getc(f)+0*(l+=(c=='\n')?1:0) : c+(0*fprintf(stderr,"\nERROR: %s in line %d (char=%c).\n\n",s,l,c))
41
+ #define CONSUME_ONE_STDIN(c,b,s,l) c = (b) ? getchar()+0*(l+=(c=='\n')?1:0) : c+(0*fprintf(stderr,"\nERROR: %s in line %d.\n\n",s,l))
42
+
43
+ #define CONSUME_ALL(f,c,b,l) for ( ; b; c=getc(f)+0*(l+=(c=='\n')?1:0) )
44
+ #define CONSUME_ALL_STDIN(c,b,l) for ( ; b; c=getchar()+0*(l+=(c=='\n')?1:0) )
45
+
46
+ #define CONSUME_STR(f,c,b,s,i,l) for ( i=0; (b) || false != (s[i++]='\0'); s[i++]=c, c=getc(f)+0*(l+=(c=='\n')?1:0) )
47
+ #define CONSUME_STR_SAFE(f,c,b,s,i,m,l) for ( i=0; i<m-1&&((b)||false!=(s[i++]='\0')); s[i++]=c, c=getc(f)+0*(l+=(c=='\n')?1:0) )
48
+ #define CONSUME_STR_STDIN(c,b,s,i,l) for ( i=0; (b) || false != (s[i++]='\0'); s[i++]=c, c=getchar()+0*(l+=(c=='\n')?1:0) )
49
+
50
+ #define CONSUME_INT(f,c,i,l) for ( i=0; (c>='0' && c<='9'); i=(i*10)+(c-'0'), c=getc(f)+0*(l+=(c=='\n')?1:0) )
51
+ #define CONSUME_INT_STDIN(c,i,l) for ( i=0; (c>='0' && c<='9'); i=(i*10)+(c-'0'), c=getchar()+0*(l+=(c=='\n')?1:0) )
52
+
53
+ #define CONSUME_DEC(f,c,i,j,l) for ( j=1; (c>='0' && c<='9'); j*=10, i+=(c-'0')/j, c=getc(f)+0*(l+=(c=='\n')?1:0) )
54
+
55
+ #define CONSUME_HEX(f,c,i,l) for ( i=0; (c>='0' && c<='9') || (c>='a' && c<='f'); i=(i*16)+((c<'a')?c-'0':c+10-'a'), c=getc(f)+0*(l+=(c=='\n')?1:0) )
56
+
57
+ #define CONSUME_ALL_SOCKET(f,c,b,l) for ( char s[1]; b; c=((recv(f,&s[0],1,MSG_WAITALL)==1) ? s[0]+0*(l+=(c=='\n')?1:0) : '\0') )
58
+ #define CONSUME_STR_SOCKET(f,c,b,s,i,l) for ( i=0; (b) || ('\0'!=(s[i++]='\0')); s[i++]=c, c=((recv(f,&s[i],1,MSG_WAITALL)==1) ? s[i]+0*(l+=(c=='\n')?1:0) : (s[i]='\0')) )
59
+
60
+ //#define CONSUME_ALL_STRING(f,c,b,l) for ( int ii=0; b && f[ii]!='\0'; c=f[ii]+0*(l+=(c=='\n')?1:0), ii++ )
61
+ //#define CONSUME_STR_STRING(f,c,b,s,i,l) for ( i=0; (b && f[i]!='\0') || false != (s[i++]='\0'); s[i++]=c, c=f[i]+0*(l+=(c=='\n')?1:0) )
62
+
63
+ #endif //_NL_IO_MACROS__
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-list.h ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_LIST_ //////////////////////////////////////////////////////////////
25
+ #define _NL_LIST_ //////////////////////////////////////////////////////////////
26
+
27
+ #include <cstdlib>
28
+
29
+ #define Listed(x) ListedObject<x>
30
+
31
+ ////////////////////////////////////////////////////////////////////////////////
32
+ //
33
+ // Container macros
34
+ //
35
+ ////////////////////////////////////////////////////////////////////////////////
36
+
37
+ // Standard loop...
38
+ #define foreach(p,c) for ( p=(c).getNext(NULL); p!=NULL; p=(c).getNext(p) )
39
+
40
+ // True unless proven false...
41
+ #define setifall(y,p,c,x) for ( p=(c).getNext(NULL), y=true; p!=NULL && y; y &= (x), p=(c).getNext(p) )
42
+
43
+ // False unless proven true...
44
+ #define setifexists(y,p,c,x) for ( p=(c).getNext(NULL), y=false; p!=NULL && !y; y |= (x), p=(c).getNext(p) )
45
+
46
+
47
+ ////////////////////////////////////////////////////////////////////////////////
48
+
49
+ template <class T>
50
+ class Ptr
51
+ {
52
+ private:
53
+ T* ptObj ;
54
+ public:
55
+ Ptr ( ) { ptObj=NULL; }
56
+ Ptr ( T* pt ) { ptObj=pt; }
57
+ Ptr ( T& t ) { ptObj=&t; }
58
+ Ptr ( const Ptr<T>& pt ) { ptObj=pt.ptObj; }
59
+ bool operator> ( const Ptr<T>& pt ) const { return(ptObj>pt.ptObj); }
60
+ bool operator< ( const Ptr<T>& pt ) const { return(ptObj<pt.ptObj); }
61
+ bool operator>= ( const Ptr<T>& pt ) const { return(ptObj>=pt.ptObj); }
62
+ bool operator<= ( const Ptr<T>& pt ) const { return(ptObj<=pt.ptObj); }
63
+ bool operator== ( const Ptr<T>& pt ) const { return(ptObj==pt.ptObj); }
64
+ bool operator!= ( const Ptr<T>& pt ) const { return(ptObj!=pt.ptObj); }
65
+ Ptr<T>& operator= ( const Ptr<T>& pt ) { ptObj=pt.ptObj; return *this; }
66
+ T& operator* ( ) const { return *ptObj; }
67
+ T* operator-> ( ) const { return ptObj; }
68
+ } ;
69
+
70
+
71
+ ////////////////////////////////////////////////////////////////////////////////
72
+
73
+ template <class T>
74
+ class ListedObject ;
75
+
76
+ template <class T>
77
+ class List {
78
+ private:
79
+
80
+ ListedObject<T>* plotLast ;
81
+
82
+ public:
83
+
84
+ typedef ListedObject<T>* iterator;
85
+ typedef const ListedObject<T>* const_iterator;
86
+
87
+ // Constructor and destructor methods...
88
+ List ( ) ;
89
+ List ( const T& ) ;
90
+ List ( const List<T>& ) ;
91
+ List ( const List<T>&, const List<T>& ) ;
92
+ ~List ( ) ;
93
+
94
+ // Overloaded operators...
95
+ List<T>& operator= ( const List<T>& ) ;
96
+ List<T>& operator+= ( const List<T>& ) ;
97
+ bool operator== ( const List<T>& ) const ;
98
+ bool operator!= ( const List<T>& ) const ;
99
+
100
+ // Specification methods...
101
+ void clear ( ) ;
102
+ T& insert ( Listed(T)* ) ;
103
+ void remove ( Listed(T)* ) ;
104
+ T& add ( ) ;
105
+ T& push ( ) ;
106
+ void pop ( ) ;
107
+ Listed(T)* setFirst ( ) ;
108
+ Listed(T)* setNext ( Listed(T)* ) ;
109
+
110
+ // Extraction methods...
111
+ const_iterator begin ( ) const { return getNext(NULL); }
112
+ const_iterator end ( ) const { return NULL; }
113
+ iterator& operator++ ( ) { *this=getNext(*this); return *this; }
114
+ int getCard ( ) const ;
115
+ Listed(T)* getFirst ( ) const ;
116
+ Listed(T)* getSecond( ) const ;
117
+ Listed(T)* getLast ( ) const ;
118
+ Listed(T)* getNext ( const Listed(T)* ) const ;
119
+ bool contains ( const T& ) const ;
120
+ bool isEmpty ( ) const ;
121
+
122
+ /* // Input / output methods... */
123
+ /* friend IStream operator>> ( pair<IStream,List<T>*> is_x, const char* psDlm ) { */
124
+ /* IStream& is = is_x.first; */
125
+ /* List<T>& x = *is_x.second; */
126
+ /* if (IStream()!=is) */
127
+ /* is = pair<IStream,T*>(is,&x.add())>>psDlm; */
128
+ /* return is; */
129
+ /* } */
130
+ } ;
131
+
132
+ ////////////////////////////////////////////////////////////////////////////////
133
+
134
+ template <class T>
135
+ class ListedObject : public T
136
+ {
137
+ friend class List<T> ;
138
+
139
+ private:
140
+
141
+ ListedObject<T>* plotNext ;
142
+
143
+ public:
144
+
145
+ const ListedObject<T>* next ( ) const { return plotNext; }
146
+ ListedObject ( ) { plotNext = NULL; }
147
+ ListedObject ( const ListedObject<T>& lot )
148
+ { T::operator=(lot); }
149
+ ListedObject<T>& operator= ( const ListedObject<T>& lot )
150
+ { T::operator=(lot); return(*this); }
151
+ operator T() { return *this; }
152
+ } ;
153
+
154
+
155
+ ////////////////////////////////////////////////////////////////////////////////
156
+
157
+ template <class T>
158
+ List<T>::List ( )
159
+ {
160
+ plotLast = NULL ;
161
+ }
162
+
163
+ ////////////////////////////////////////////////////////////////////////////////
164
+
165
+ template <class T>
166
+ List<T>::List ( const T& t )
167
+ {
168
+ plotLast = NULL ;
169
+
170
+ add() = t ;
171
+ }
172
+
173
+ ////////////////////////////////////////////////////////////////////////////////
174
+
175
+ template <class T>
176
+ List<T>::List ( const List<T>& lt )
177
+ {
178
+ ListedObject<T>* pt ;
179
+
180
+ plotLast = NULL ;
181
+
182
+ foreach ( pt, lt )
183
+ add() = *pt ;
184
+ }
185
+
186
+ ////////////////////////////////////////////////////////////////////////////////
187
+
188
+ template <class T>
189
+ List<T>::List ( const List<T>& lt1, const List<T>& lt2 )
190
+ {
191
+ ListedObject<T>* pt ;
192
+
193
+ plotLast = NULL ;
194
+
195
+ foreach ( pt, lt1 )
196
+ add() = *pt ;
197
+ foreach ( pt, lt2 )
198
+ add() = *pt ;
199
+ }
200
+
201
+ ////////////////////////////////////////////////////////////////////////////////
202
+
203
+ template <class T>
204
+ List<T>::~List ( )
205
+ {
206
+ clear();
207
+ }
208
+
209
+ ////////////////////////////////////////////////////////////////////////////////
210
+
211
+ template <class T>
212
+ void List<T>::clear ( )
213
+ {
214
+ ListedObject<T>* plot ;
215
+ ListedObject<T>* plot2 ;
216
+ if ( NULL != (plot = plotLast) )
217
+ do { plot2 = plot->plotNext ;
218
+ ////fprintf(stderr,"list::destr %x\n",plot);
219
+ delete plot ;
220
+ } while ( plotLast != (plot = plot2) ) ;
221
+ plotLast = NULL ;
222
+ }
223
+
224
+ ////////////////////////////////////////////////////////////////////////////////
225
+
226
+ template <class T>
227
+ List<T>& List<T>::operator= ( const List<T>& lt )
228
+ {
229
+ Listed(T)* pt ;
230
+
231
+ this->~List ( ) ;
232
+ plotLast = NULL ;
233
+
234
+ foreach ( pt, lt )
235
+ add() = *pt ;
236
+
237
+ return *this ;
238
+ }
239
+
240
+ ////////////////////////////////////////////////////////////////////////////////
241
+
242
+ template <class T>
243
+ List<T>& List<T>::operator+= ( const List<T>& lt )
244
+ {
245
+ Listed(T)* pt ;
246
+
247
+ foreach ( pt, lt )
248
+ add() = *pt ;
249
+
250
+ return *this ;
251
+ }
252
+
253
+ ////////////////////////////////////////////////////////////////////////////////
254
+
255
+ template <class T>
256
+ bool List<T>::operator== ( const List<T>& lt ) const
257
+ {
258
+ Listed(T)* pt1 ;
259
+ Listed(T)* pt2 ;
260
+
261
+ for ( pt1 = getNext(NULL), pt2 = lt.getNext(NULL);
262
+ pt1 != NULL && pt2 != NULL ;
263
+ pt1 = getNext(pt1), pt2 = lt.getNext(pt2) )
264
+ if ( !(*pt1 == *pt2) ) return false ;
265
+
266
+ return ( pt1 == NULL && pt2 == NULL ) ;
267
+ }
268
+
269
+ ////////////////////////////////////////////////////////////////////////////////
270
+
271
+ template <class T>
272
+ bool List<T>::operator!= ( const List<T>& lt ) const
273
+ {
274
+ return !(*this == lt) ;
275
+ }
276
+
277
+ ////////////////////////////////////////////////////////////////////////////////
278
+
279
+ template <class T>
280
+ T& List<T>::insert ( Listed(T)* plotPrev )
281
+ {
282
+ ListedObject<T>* plot = new ListedObject<T> ;
283
+ ////fprintf(stderr,"list::const %x\n",plot);
284
+
285
+ if ( NULL != plotPrev )
286
+ {
287
+ plot->plotNext = plotPrev->plotNext ;
288
+ plotPrev->plotNext = plot ;
289
+ if ( plotLast == plotPrev )
290
+ plotLast = plot ;
291
+ }
292
+ else if ( NULL != plotLast )
293
+ {
294
+ plot->plotNext = plotLast->plotNext ;
295
+ plotLast->plotNext = plot ;
296
+ }
297
+ else
298
+ {
299
+ plot->plotNext = plot ;
300
+ plotLast = plot ;
301
+ }
302
+
303
+ return *plot ;
304
+ }
305
+
306
+ ////////////////////////////////////////////////////////////////////////////////
307
+
308
+ /* DON'T KNOW WHY THIS DOESN'T WORK
309
+ template <class T>
310
+ void List<T>::remove ( Listed(T)* plot )
311
+ {
312
+ assert ( plot );
313
+ assert ( plotLast );
314
+ // If only one element...
315
+ if ( plot->plotNext == plot )
316
+ {
317
+ assert ( plotLast == plot );
318
+ plotLast = NULL;
319
+ fprintf(stderr,"list::delete1 %x\n",plot);
320
+ delete plot;
321
+ }
322
+ // If more than one element...
323
+ else
324
+ {
325
+ if ( plotLast == plot->plotNext ) plotLast = plot;
326
+ Listed(T)* plotTemp = plot->plotNext;
327
+ *plot = *(plot->plotNext);
328
+ fprintf(stderr,"list::delete2 %x\n",plotTemp);
329
+ delete plotTemp;
330
+ }
331
+ }
332
+ */
333
+
334
+ ////////////////////////////////////////////////////////////////////////////////
335
+
336
+ template <class T>
337
+ T& List<T>::add ( )
338
+ {
339
+ ListedObject<T>* plot = new ListedObject<T> ;
340
+ ////fprintf(stderr,"list::add %x\n",plot);
341
+
342
+ if ( NULL != plotLast )
343
+ {
344
+ plot->plotNext = plotLast->plotNext ;
345
+ plotLast->plotNext = plot ;
346
+ plotLast = plot ;
347
+ }
348
+ else
349
+ {
350
+ plot->plotNext = plot ;
351
+ plotLast = plot ;
352
+ }
353
+
354
+ return *plot ;
355
+ }
356
+
357
+ ////////////////////////////////////////////////////////////////////////////////
358
+
359
+ template <class T>
360
+ T& List<T>::push ( )
361
+ {
362
+ ListedObject<T>* plot = new ListedObject<T> ;
363
+ ////fprintf(stderr,"list::push %x\n",plot);
364
+
365
+ if ( NULL != plotLast )
366
+ {
367
+ plot->plotNext = plotLast->plotNext ;
368
+ plotLast->plotNext = plot ;
369
+ }
370
+ else
371
+ {
372
+ plot->plotNext = plot ;
373
+ plotLast = plot ;
374
+ }
375
+
376
+ return *plot ;
377
+ }
378
+
379
+ ////////////////////////////////////////////////////////////////////////////////
380
+
381
+ template <class T>
382
+ void List<T>::pop ( )
383
+ {
384
+ ListedObject<T>* plot = plotLast->plotNext ;
385
+
386
+ if ( plot->plotNext == plot )
387
+ plotLast = NULL ;
388
+ else
389
+ plotLast->plotNext = plot->plotNext ;
390
+
391
+ ////fprintf(stderr,"list::pop %x\n",plot);
392
+ delete plot ;
393
+ }
394
+
395
+ ////////////////////////////////////////////////////////////////////////////////
396
+
397
+ template <class T>
398
+ int List<T>::getCard ( ) const
399
+ {
400
+ Listed(T)* pt ;
401
+ int i = 0 ;
402
+
403
+ foreach ( pt, *this )
404
+ i++ ;
405
+
406
+ return i ;
407
+ }
408
+
409
+ ////////////////////////////////////////////////////////////////////////////////
410
+
411
+ template <class T>
412
+ ListedObject<T>* List<T>::setFirst ( )
413
+ {
414
+ return ( NULL != plotLast ) ? plotLast->plotNext : NULL ;
415
+ }
416
+
417
+ ////////////////////////////////////////////////////////////////////////////////
418
+
419
+ template <class T>
420
+ ListedObject<T>* List<T>::setNext ( ListedObject<T>* plot )
421
+ {
422
+ return ( NULL == plot && NULL != plotLast ) ? plotLast->plotNext :
423
+ ( plot != plotLast ) ? plot->plotNext : NULL ;
424
+ }
425
+
426
+ ////////////////////////////////////////////////////////////////////////////////
427
+
428
+ template <class T>
429
+ ListedObject<T>* List<T>::getFirst ( ) const
430
+ {
431
+ return ( NULL != plotLast ) ? plotLast->plotNext : NULL ;
432
+ }
433
+
434
+ ////////////////////////////////////////////////////////////////////////////////
435
+
436
+ template <class T>
437
+ ListedObject<T>* List<T>::getSecond ( ) const
438
+ {
439
+ return getNext(getFirst()) ;
440
+ }
441
+
442
+ ////////////////////////////////////////////////////////////////////////////////
443
+
444
+ template <class T>
445
+ ListedObject<T>* List<T>::getLast ( ) const
446
+ {
447
+ return ( NULL != plotLast ) ? plotLast : NULL ;
448
+ }
449
+
450
+ ////////////////////////////////////////////////////////////////////////////////
451
+
452
+ template <class T>
453
+ ListedObject<T>* List<T>::getNext ( const ListedObject<T>* plot ) const
454
+ {
455
+ return ( NULL == plot && NULL != plotLast ) ? plotLast->plotNext :
456
+ ( plot != plotLast ) ? plot->plotNext : NULL ;
457
+ }
458
+
459
+ ////////////////////////////////////////////////////////////////////////////////
460
+
461
+ template <class T>
462
+ bool List<T>::contains ( const T& t ) const
463
+ {
464
+ ListedObject<T>* pt ;
465
+
466
+ foreach ( pt, *this )
467
+ if ( t == *pt ) return true ;
468
+
469
+ return false ;
470
+ }
471
+
472
+ ////////////////////////////////////////////////////////////////////////////////
473
+
474
+ template <class T>
475
+ bool List<T>::isEmpty ( ) const
476
+ {
477
+ return ( NULL == plotLast ) ;
478
+ }
479
+
480
+ #endif //_NL_LIST_ /////////////////////////////////////////////////////////////
481
+
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-matrix.h ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ template<class I,class J,class P>
25
+ class SparseMatrix : public SimpleHash<I,SimpleHash<J,P> > {
26
+ public:
27
+
28
+ typedef SimpleHash<I,SimpleHash<J,P> > Parent;
29
+
30
+ //// Matrix / vector operator methods...
31
+ friend SparseMatrix<I,J,P> operator* ( const SparseMatrix<I,J,P>& a, const SparseMatrix<I,J,P>& b ) {
32
+ SparseMatrix mOut;
33
+ for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
34
+ I i = iit->first;
35
+ for ( typename SimpleHash<J,P>::const_iterator kit=a.get(i).begin(); kit!=a.get(i).end(); kit++ ) {
36
+ I k = kit->first;
37
+ for ( typename SimpleHash<J,P>::const_iterator jit=b.get(k).begin(); jit!=b.get(k).end(); jit++ ) {
38
+ I j = jit->first;
39
+ if ( a.get(i).get(k)!=0 && b.get(k).get(j)!=0 )
40
+ mOut.set(i).set(j) += a.get(i).get(k) * b.get(k).get(j);
41
+ }
42
+ }
43
+ }
44
+ return mOut;
45
+ }
46
+ friend SparseMatrix<I,J,P> operator+ ( const SparseMatrix<I,J,P>& a, const SparseMatrix<I,J,P>& b ) {
47
+ SparseMatrix mOut;
48
+ for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
49
+ I i = iit->first;
50
+ for ( typename SimpleHash<J,P>::const_iterator jit=a.get(i).begin(); jit!=a.get(i).end(); jit++ ) {
51
+ I j = jit->first;
52
+ mOut.set(i).set(j) = a.get(i).get(j);
53
+ }
54
+ }
55
+ for ( typename Parent::const_iterator iit=b.begin(); iit!=b.end(); iit++ ) {
56
+ I i = iit->first;
57
+ for ( typename SimpleHash<J,P>::const_iterator jit=b.get(i).begin(); jit!=b.get(i).end(); jit++ ) {
58
+ I j = jit->first;
59
+ mOut.set(i).set(j) += b.get(i).get(j);
60
+ }
61
+ }
62
+ return mOut;
63
+ }
64
+ friend SparseMatrix<I,J,P> operator- ( const SparseMatrix<I,J,P>& a, const SparseMatrix<I,J,P>& b ) {
65
+ SparseMatrix mOut;
66
+ for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
67
+ I i = iit->first;
68
+ for ( typename SimpleHash<J,P>::const_iterator jit=a.get(i).begin(); jit!=a.get(i).end(); jit++ ) {
69
+ I j = jit->first;
70
+ mOut.set(i).set(j) = a.get(i).get(j);
71
+ }
72
+ }
73
+ for ( typename Parent::const_iterator iit=b.begin(); iit!=b.end(); iit++ ) {
74
+ I i = iit->first;
75
+ for ( typename SimpleHash<J,P>::const_iterator jit=b.get(i).begin(); jit!=b.get(i).end(); jit++ ) {
76
+ I j = jit->first;
77
+ mOut.set(i).set(j) -= b.get(i).get(j);
78
+ }
79
+ }
80
+ return mOut;
81
+ }
82
+ // Matrix + scalar operators...
83
+ friend SparseMatrix<I,J,P> operator+ ( const SparseMatrix<I,J,P>& a, const P& p ) {
84
+ SparseMatrix mOut;
85
+ for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
86
+ I i = iit->first;
87
+ for ( typename SimpleHash<J,P>::const_iterator jit=a.get(i).begin(); jit!=a.get(i).end(); jit++ ) {
88
+ I j = jit->first;
89
+ mOut.set(i).set(j) = a.get(i).get(j) + p;
90
+ }
91
+ }
92
+ return mOut;
93
+ }
94
+ friend SparseMatrix<I,J,P> operator- ( const SparseMatrix<I,J,P>& a, const P& p ) {
95
+ SparseMatrix mOut;
96
+ for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
97
+ I i = iit->first;
98
+ for ( typename SimpleHash<J,P>::const_iterator jit=a.get(i).begin(); jit!=a.get(i).end(); jit++ ) {
99
+ I j = jit->first;
100
+ mOut.set(i).set(j) = a.get(i).get(j) - p;
101
+ }
102
+ }
103
+ return mOut;
104
+ }
105
+ // Diagonal matrix of vector...
106
+ friend SparseMatrix<I,J,P> diag ( const SparseMatrix<I,J,P>& a ) {
107
+ SparseMatrix mOut;
108
+ for ( typename Parent::const_iterator iit=a.begin(); iit!=a.end(); iit++ ) {
109
+ I i = iit->first;
110
+ for ( typename SimpleHash<J,P>::const_iterator jit=a.get(i).begin(); jit!=a.get(i).end(); jit++ ) {
111
+ I j = jit->first;
112
+ assert(j==0); // must be vector
113
+ mOut.set(i).set(i) += a.get(i).get(j);
114
+ }
115
+ }
116
+ return mOut;
117
+ }
118
+ // Scalar inf-norm (max) of matrix / vector...
119
+ P infnorm ( ) const {
120
+ P pOut = 0; // sparse matrix assumes some values are zero, so this is default infnorm.
121
+ for ( typename Parent::const_iterator iit=Parent::begin(); iit!=Parent::end(); iit++ ) {
122
+ I i = iit->first;
123
+ for ( typename SimpleHash<J,P>::const_iterator jit=Parent::get(i).begin(); jit!=Parent::get(i).end(); jit++ ) {
124
+ I j = jit->first;
125
+ if ( Parent::get(i).get(j) > pOut ) pOut = Parent::get(i).get(j);
126
+ }
127
+ }
128
+ return pOut;
129
+ }
130
+
131
+ // Scalar one-norm (sum) of matrix / vector...
132
+ P onenorm ( ) const {
133
+ P sum=0;
134
+ for ( typename Parent::const_iterator iit=Parent::begin(); iit!=Parent::end(); iit++ ) {
135
+ I i = iit->first;
136
+ for ( typename SimpleHash<J,P>::const_iterator jit=Parent::get(i).begin(); jit!=Parent::get(i).end(); jit++ ) {
137
+ I j = jit->first;
138
+ sum += Parent::get(i).get(j);
139
+ }
140
+ }
141
+ return sum;
142
+ }
143
+
144
+ //// Input / output methods...
145
+ friend pair<StringInput,SparseMatrix<I,J,P>*> operator>> ( StringInput si, SparseMatrix<I,J,P>& m ) {
146
+ return pair<StringInput,SparseMatrix<I,J,P>*>(si,&m);
147
+ }
148
+ friend StringInput operator>> ( pair<StringInput,SparseMatrix<I,J,P>*> si_m, const char* psD ) {
149
+ if (StringInput(NULL)==si_m.first) return si_m.first;
150
+ StringInput si; I i,j; P p;
151
+ si=si_m.first>>i>>" : ">>j>>" = ">>p>>psD;
152
+ if ( si!=NULL ) si_m.second->set(i).set(j) = p;
153
+ return si;
154
+ }
155
+ friend ostream& operator<< ( ostream& os, const SparseMatrix<I,J,P>& m ) {
156
+ int ctr=0;
157
+ for ( typename Parent::const_iterator iit=m.begin(); iit!=m.end(); iit++ ) {
158
+ I i = iit->first;
159
+ for ( typename SimpleHash<J,P>::const_iterator jit=m.get(i).begin(); jit!=m.get(i).end(); jit++ ) {
160
+ I j = jit->first;
161
+ os<<((0==ctr++)?"":",")<<i<<":"<<j<<"="<<m.get(i).get(j);
162
+ }
163
+ }
164
+ return os;
165
+ }
166
+ friend String& operator<< ( String& str, const SparseMatrix<I,J,P>& m ) {
167
+ int ctr=0;
168
+ for ( typename Parent::const_iterator iit=m.begin(); iit!=m.end(); iit++ ) {
169
+ I i = iit->first;
170
+ for ( typename SimpleHash<J,P>::const_iterator jit=m.get(i).begin(); jit!=m.get(i).end(); jit++ ) {
171
+ I j = jit->first;
172
+ str<<((0==ctr++)?"":",")<<i<<j;
173
+ }
174
+ }
175
+ return str;
176
+ }
177
+ };
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-modelfile.h ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_MODEL_FILE__
25
+ #define _NL_MODEL_FILE__
26
+
27
+ #include "nl-string.h"
28
+ #include "nl-iomacros.h"
29
+
30
+ #include <netinet/in.h>
31
+
32
+ static bool OUTPUT_QUIET = false;
33
+
34
+ ///////////////////////////////////////////////////////////////////////////////
35
+
36
+ void processModelFilePtr ( FILE* pf, bool rF(Array<char*>&) ) {
37
+ int i=0; int numFields=0; int c=' '; int line=1;
38
+ CONSUME_ALL(pf,c,WHITESPACE(c),line); // Get to first record
39
+ while ( c!=EOF ) { // For each record
40
+ if ( c=='#' ) CONSUME_ALL(pf, c, c!='\n' && c!='\0', line ) ; // If comment, consume
41
+ else { // If no comment,
42
+ Array<char*> aps(100);
43
+ String psBuff(1000);
44
+ CONSUME_STR ( pf, c, (c!='\n' && c!='\0'), psBuff, i, line );
45
+
46
+ char* psT=NULL;
47
+ for(int i=0;true;i++) {
48
+ char* z = strtok_r ( (0==i)?psBuff.c_array():NULL, " :=", &psT );
49
+ if (!z) break;
50
+ aps[i]=z;
51
+ }
52
+
53
+ if ( !rF(aps) ) // Try to process fields, else complain
54
+ fprintf( stderr, "\nERROR: %d %d-arg %s in line %d\n\n", numFields, aps.size(), aps[0], line);
55
+ }
56
+ CONSUME_ALL(pf,c,WHITESPACE(c),line); // Consume whitespace
57
+ }
58
+ }
59
+
60
+ ///////////////////////////////////////////////////////////////////////////////
61
+
62
+ void processModelFile ( const char* ps, bool rF(Array<char*>&) ) {
63
+ FILE* pf;
64
+ if(!OUTPUT_QUIET) fprintf ( stderr, "Reading model file %s...\n", ps ) ;
65
+ if ( NULL == (pf=fopen(ps,"r")) ) // Complain if file not found
66
+ fprintf ( stderr, "\nERROR: file %s could not be opened.\n\n", ps ) ;
67
+ processModelFilePtr ( pf, rF );
68
+ fclose(pf);
69
+ if(!OUTPUT_QUIET) fprintf ( stderr, "Model file %s loaded.\n", ps ) ;
70
+ }
71
+
72
+ ///////////////////////////////////////////////////////////////////////////////
73
+
74
+ void processModelSocket ( const int tSockfd, int& c, bool rF(Array<char*>&) ) {
75
+ int i=0; int numFields=0; int line=1;
76
+ CONSUME_ALL_SOCKET(tSockfd,c,WHITESPACE(c),line); // Get to first record
77
+ while ( c!='\0' && c!='\5' ) { // For each record
78
+ if ( c=='#' ) CONSUME_ALL_SOCKET(tSockfd, c, (c!='\n' && c!='\0' && c!='\5'), line ) ; // If comment, consume
79
+ else { // If no comment,
80
+ Array<char*> aps(100);
81
+ String psBuff(1000);
82
+ CONSUME_STR_SOCKET ( tSockfd, c, (c!='\n' && c!='\0' && c!='\5'), psBuff, i, line );
83
+ ////cerr<<"|"<<psBuff.c_array()<<"|"<<endl;
84
+
85
+ char* psT=NULL;
86
+ for(int i=0;true;i++) {
87
+ char* z = strtok_r ( (0==i)?psBuff.c_array():NULL, " :=", &psT );
88
+ if (!z) break;
89
+ aps[i]=z;
90
+ }
91
+
92
+ if ( !rF(aps) ) // Try to process fields, else complain
93
+ fprintf( stderr, "\nERROR: %d-arg %s in line %d\n\n", numFields, aps[0], line);
94
+ }
95
+ CONSUME_ALL_SOCKET(tSockfd,c,WHITESPACE(c),line); // Consume whitespace
96
+ }
97
+ }
98
+
99
+ void processModelSocket ( const int tSockfd, bool rF(Array<char*>&) ) {
100
+ int c=' ';
101
+ processModelSocket ( tSockfd, c, rF );
102
+ }
103
+
104
+ ///////////////////////////////////////////////////////////////////////////////
105
+
106
+ /*
107
+ void processModelString ( String& sBuff, bool rF(Array<char*>&) ) {
108
+ if ('#'!=sBuff[0]) {
109
+ Array<char*> aps(100);
110
+ char* psT=NULL;
111
+ for(int i=0;true;i++) {
112
+ char* z = strtok_r ( (0==i)?sBuff.c_array():NULL, " :=", &psT );
113
+ if (!z) break;
114
+ aps[i]=z;
115
+ }
116
+ if ( !rF(aps) ) // Try to process fields, else complain
117
+ fprintf( stderr, "\nERROR: %d-arg %s in line %d\n\n", numFields, aps[0], line);
118
+ }
119
+ }
120
+ */
121
+
122
+ ///////////////////////////////////////////////////////////////////////////////
123
+
124
+ #endif //_NL_MODEL_FILE__
125
+
126
+
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-prob.h ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_PROB__
25
+ #define _NL_PROB__
26
+
27
+ #include "nl-safeids.h"
28
+ #include <math.h>
29
+ #include <limits.h>
30
+
31
+ ////////////////////////////////////////////////////////////////////////////////
32
+
33
+ typedef double PDFVal;
34
+ typedef double LogPDFVal;
35
+
36
+ ////////////////////////////////////////////////////////////////////////////////
37
+
38
+ class Prob {
39
+ private:
40
+ double gVal;
41
+
42
+ public:
43
+ Prob ( ) { gVal = 0.0; }
44
+ Prob (double d) { gVal = d; }
45
+ Prob (const char* ps) { gVal = atof(ps); }
46
+
47
+ operator double() const { return gVal; }
48
+ double toDouble() const { return gVal; }
49
+ Prob& operator+= ( const Prob p ) { gVal += p.gVal; return *this; }
50
+ Prob& operator-= ( const Prob p ) { gVal -= p.gVal; return *this; }
51
+ Prob& operator*= ( const Prob p ) { gVal *= p.gVal; return *this; }
52
+ Prob& operator/= ( const Prob p ) { gVal /= p.gVal; return *this; }
53
+
54
+ friend ostream& operator<< ( ostream& os, const Prob& pr ) { return os<<pr.toDouble(); }
55
+ friend String& operator<< ( String& str, const Prob& pr ) { return str<<pr.toDouble(); }
56
+ friend pair<StringInput,Prob*> operator>> ( StringInput si, Prob& n ) { return pair<StringInput,Prob*>(si,&n); }
57
+ friend StringInput operator>> ( pair<StringInput,Prob*> si_n, const char* psDlm ) {
58
+ double d=0.0; StringInput si=si_n.first>>d>>psDlm; *si_n.second=Prob(d); return si; }
59
+ };
60
+
61
+ ////////////////////////////////////////////////////////////////////////////////
62
+ ////////////////////////////////////////////////////////////////////////////////
63
+ //
64
+ // LogProb -- encapsulate min probability in sum operations
65
+ //
66
+ ////////////////////////////////////////////////////////////////////////////////
67
+
68
+ //#define MIN_LOG_PROB (1-MAXINT)
69
+ #define MIN_LOG_PROB INT_MIN
70
+
71
+ class LogProb : public Id<int> {
72
+ public:
73
+ // Constructor / destructor methods...
74
+ LogProb ( ) { set(MIN_LOG_PROB); }
75
+ LogProb ( int i ) { set(i); }
76
+ LogProb ( double d ) { set(int(100.0*log(d))); }
77
+ LogProb ( Prob d ) { set(int(100.0*log(d))); }
78
+ // Specification methods...
79
+ LogProb& operator+= ( const LogProb i ) { assert(false); return *this; }
80
+ LogProb& operator*= ( const LogProb i )
81
+ { if((toInt() == MIN_LOG_PROB) || (i.toInt() == MIN_LOG_PROB)){
82
+ set(MIN_LOG_PROB);
83
+ }else{
84
+ // Correct underflow if result is greater than either addend...
85
+ int k=toInt()+i.toInt(); set((toInt()<=0 && i.toInt()<=0 && (k>i.toInt() || k>toInt())) ? MIN_LOG_PROB : k);
86
+ }
87
+ return *this;
88
+ }
89
+ LogProb& operator-= ( const LogProb i ) { assert(false); return *this; }
90
+ LogProb& operator/= ( const LogProb i )
91
+ { if((toInt() == MIN_LOG_PROB) || (i.toInt() == MIN_LOG_PROB)){
92
+ set(MIN_LOG_PROB);
93
+ }else{
94
+ int k=toInt()-i.toInt(); set(k);
95
+ }
96
+ return *this;
97
+ }
98
+
99
+ // Extraction methods...
100
+ bool operator==( const LogProb i ) const { return(i.toInt()==toInt()); }
101
+ bool operator!=( const LogProb i ) const { return(i.toInt()!=toInt()); }
102
+ LogProb operator+ ( const LogProb i ) const { assert(false); return *this; } // no support for addition in log mode!
103
+ LogProb operator- ( const LogProb i ) const { assert(false); return *this; } // no support for addition in log mode!
104
+ LogProb operator* ( const LogProb i ) const {
105
+ int k;
106
+ if((toInt() == MIN_LOG_PROB) || (i.toInt() == MIN_LOG_PROB)){
107
+ k = MIN_LOG_PROB;
108
+ }else{
109
+ k=toInt()+i.toInt();
110
+ // Correct underflow if result is greater than either addend...
111
+ k = (toInt()<0 && i.toInt()<0 && (k>i.toInt() || k>toInt())) ? MIN_LOG_PROB : k;
112
+ }
113
+ return LogProb(k);
114
+ }
115
+ LogProb operator/ ( const LogProb i ) const {
116
+ int k;
117
+ if((toInt() == MIN_LOG_PROB) || (i.toInt() == MIN_LOG_PROB)){
118
+ k = MIN_LOG_PROB;
119
+ }else{
120
+ k = toInt()-i.toInt();
121
+ // // Correct underflow if result is greater than either addend...
122
+ // k = (toInt()<0 && -i.toInt()<0 && (k>-i.toInt() || k>toInt())) ? MIN_LOG_PROB : k;
123
+ }
124
+ return LogProb(k);
125
+ }
126
+ Prob toProb() const { return exp(double(toInt())/100.0); }
127
+ double toDouble() const { return toProb().toDouble(); }
128
+ // operator double() const { return exp(toInt()/100.0); }
129
+ friend ostream& operator<< ( ostream& os, const LogProb& lp ) { return os<<lp.toInt(); }
130
+ friend String& operator<< ( String& str, const LogProb& lp ) { return str<<lp.toInt(); }
131
+ friend pair<StringInput,LogProb*> operator>> ( StringInput si, LogProb& n ) { return pair<StringInput,LogProb*>(si,&n); }
132
+ friend StringInput operator>> ( pair<StringInput,LogProb*> si_n, const char* psDlm ) {
133
+ double d=0.0; StringInput si=si_n.first>>d>>psDlm; *si_n.second=LogProb(d); return si; }
134
+ };
135
+
136
+ #endif /* _NL_PROB__ */
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-racpt.h ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+
25
+ #ifndef _NL_RACPT__
26
+ #define _NL_RACPT__
27
+
28
+
29
+ template<class K, class P>
30
+ class GenericRACPTModel : public SimpleHash<K,P> {
31
+ private:
32
+ typedef SimpleHash<K,P> HKP;
33
+ // typedef typename SimpleHash<Y,P>::const_iterator IYP;
34
+ //HKYP h;
35
+
36
+ public:
37
+ //typedef Y RVType;
38
+ //typedef BaseIterVal<std::pair<IYP,IYP>,Y> IterVal;
39
+ //typedef typename HKYP::const_iterator const_key_iterator;
40
+
41
+ bool contains ( const K& k ) const {
42
+ return ( SimpleHash<K,P>::contains(k) );
43
+ }
44
+
45
+ /*
46
+ P getProb ( const IterVal& ikyp, const K& k ) const {
47
+ if ( ikyp.iter.first == ikyp.iter.second ) { cerr<<"ERROR: no iterator to fix probability: "<<k<<endl; return P(); }
48
+ return ( ikyp.iter.first->second );
49
+ }
50
+ */
51
+
52
+ P getProb ( const K& k ) const {
53
+ return SimpleHash<K,P>::get(k);
54
+ }
55
+ P& setProb ( const K& k ) {
56
+ return SimpleHash<K,P>::set(k);
57
+ }
58
+
59
+ /*
60
+ void normalize ( ) {
61
+ for ( typename HKYP::const_iterator ik=HKYP::begin(); ik!=HKYP::end(); ik++ ) {
62
+ K k=ik->first;
63
+ P p=P();
64
+ IterVal y;
65
+ for(bool by=setFirst(y,k); by; by=setNext(y,k))
66
+ p+=getProb(y,k);
67
+ if (p!=P())
68
+ for(bool by=setFirst(y,k); by; by=setNext(y,k))
69
+ setProb(y,k)/=p;
70
+ }
71
+ }
72
+ */
73
+ /*
74
+ void transmit ( int tSockfd, const char* psId ) const {
75
+ for ( typename HKYP::const_iterator ik=HKYP::begin(); ik!=HKYP::end(); ik++ ) {
76
+ K k=ik->first;
77
+ IterVal y;
78
+ // For each non-zero probability in model...
79
+ for ( bool b=setFirst(y,k); b; b=setNext(y,k) ) {
80
+ //if ( getProb(y,k) != P() ) {
81
+ String str(1000);
82
+ str<<psId<<" "<<k<<" : "<<y<<" = "<<getProb(y,k).toDouble()<<"\n";
83
+ if ( send(tSockfd,str.c_array(),str.size()-1,MSG_EOR) != int(str.size()-1) )
84
+ {cerr<<"ERROR writing to socket\n";exit(0);}
85
+ //}
86
+ }
87
+ }
88
+ }
89
+ */
90
+ void dump ( ostream& os, const char* psId ) const {
91
+ for ( typename HKP::const_iterator ik=HKP::begin(); ik!=HKP::end(); ik++ ) {
92
+ K k=ik->first;
93
+ os << psId<<" "<<k<<" = "<<getProb(k).toDouble()<<endl;
94
+
95
+ // IterVal y;
96
+ // for ( bool b=setFirst(y,k); b; b=setNext(y,k) )
97
+ // os<<psId<<" "<<k<<" : "<<y<<" = "<<getProb(y,k).toDouble()<<"\n";
98
+ }
99
+ }
100
+ void subsume ( GenericRACPTModel<K,P>& m ) {
101
+ for ( typename HKP::const_iterator ik=m.HKP::begin(); ik!=m.HKP::end(); ik++ ) {
102
+ K k=ik->first;
103
+ setProb(k) = m.getProb(k);
104
+ // IterVal y;
105
+ // for ( bool b=m.setFirst(y,k); b; b=m.setNext(y,k) )
106
+ // setProb(y,k) = m.getProb(y,k);
107
+ }
108
+ }
109
+ void clear ( ) { SimpleHash<K,P>::clear(); }
110
+
111
+ friend pair<StringInput,GenericRACPTModel<K,P>*> operator>> ( StringInput si, GenericRACPTModel<K,P>& m ) {
112
+ return pair<StringInput,GenericRACPTModel<K,P>*>(si,&m); }
113
+
114
+ friend StringInput operator>> ( pair<StringInput,GenericRACPTModel<K,P>*> delimbuff, const char* psD ) {
115
+ K k;
116
+ StringInput si,si2,si3;
117
+ GenericRACPTModel<K,P>& m = *delimbuff.second;
118
+ si=delimbuff.first;
119
+ if ( si==NULL ) return si;
120
+
121
+ // Kill the colon since we're treating the whole thing as the condition
122
+ char * str = si.c_str();
123
+ char * p = strchr(str, ':');
124
+ if(p){
125
+ p[0] = ' ';
126
+ }
127
+ si=str;
128
+ while((si2=si>>" ")!=NULL)si=si2;
129
+ si=si>>k>>" ";
130
+ while((si2=si>>" ")!=NULL)si=si2;
131
+ si=si>>"= ";
132
+ while((si2=si>>" ")!=NULL)si=si2;
133
+ return (si!=NULL) ? si>>m.setProb(k)>>psD : si;
134
+ }
135
+ };
136
+
137
+
138
+ template<class Y, class P>
139
+ class RandAccCPT1DModel : public GenericRACPTModel<MapKey1D<Y>,P> {
140
+ public:
141
+ // typedef typename GenericCPTModel<Y,MapKey1D<Unit>,P>::IterVal IterVal;
142
+
143
+ bool contains ( const Y& y ) const {
144
+ return GenericRACPTModel<MapKey1D<Y>,P>::contains ( MapKey1D<Y>(y) );
145
+ }
146
+ /*
147
+ P getProb ( const IterVal& ixyp ) const {
148
+ return GenericCPTModel<MapKey1D<Y>,P>::getProb ( ixyp, MapKey1D<Y>(Y()) );
149
+ }
150
+ */
151
+ P getProb ( const Y& y ) const {
152
+ return GenericRACPTModel<MapKey1D<Y>,P>::getProb ( MapKey1D<Y>(y) );
153
+ }
154
+ /*
155
+ P& setProb ( const Y& y ) {
156
+ cerr << "setProb called on racpt1d" << endl;
157
+ return GenericRACPTModel<MapKey1D<Y>,P>::setProb ( MapKey1D<Y>(y) );
158
+ }
159
+ */
160
+ /*
161
+ bool readFields ( Array<char*>& aps ) {
162
+ if ( 3==aps.size() ) {
163
+ GenericRACPTModel<MapKey1D<Y>,P>::setProb ( MapKey1D<Y>(aps[1]) ) = atof(aps[2]);
164
+ return true;
165
+ }
166
+ return false;
167
+ }
168
+ */
169
+ };
170
+
171
+
172
+ ////////////////////
173
+ template<class Y, class X1, class P>
174
+ class RandAccCPT2DModel : public GenericRACPTModel<MapKey2D<X1,Y>,P> {
175
+ public:
176
+
177
+ // This stuff only for deterministic 'Determ' models...
178
+ // typedef X1 Dep1Type;
179
+ // typedef P ProbType;
180
+ // MapKey1D<X1> condKey;
181
+
182
+ bool contains ( const Y& y, const X1& x1 ) const {
183
+ // MapKey2D<X1,Y> temp = MapKey2D<X1,Y>(x1,y);
184
+ return GenericRACPTModel<MapKey2D<X1,Y>,P>::contains ( MapKey2D<X1,Y>(x1,y) );
185
+ }
186
+
187
+ P getProb ( const Y& y, const X1& x1 ) const {
188
+ return GenericRACPTModel<MapKey2D<X1,Y>,P>::getProb ( MapKey2D<X1,Y>(x1,y) );
189
+ }
190
+
191
+ /*
192
+ P& setProb ( const Y& y, const X1& x1 ) {
193
+ cerr << "setProb called on racpt2d" << endl;
194
+ return GenericRACPTModel<MapKey2D<Y,X1>,P>::setProb ( MapKey2D<Y,X1>(y,x1) );
195
+ }
196
+ */
197
+
198
+ };
199
+
200
+
201
+ ////////////////////
202
+ template<class Y, class X1, class X2, class P>
203
+ class RandAccCPT3DModel : public GenericRACPTModel<MapKey3D<X1,X2,Y>,P> {
204
+ public:
205
+
206
+ bool contains ( const Y& y, const X1& x1, const X2& x2 ) const {
207
+ return GenericRACPTModel<MapKey3D<X1,X2,Y>,P>::contains ( MapKey3D<X1,X2,Y>(x1,x2,y) );
208
+ }
209
+
210
+ P getProb ( const Y& y, const X1& x1, const X2& x2 ) const {
211
+ return GenericRACPTModel<MapKey3D<X1,X2,Y>,P>::getProb ( MapKey3D<X1,X2,Y>(x1,x2,y) );
212
+ }
213
+ /*
214
+ P& setProb ( const Y& y, const X1& x1, const X2& x2 ) {
215
+ return GenericRACPTModel<MapKey3D<X1,X2,Y>,P>::setProb ( MapKey3D<Y,X1,X2>(x1,x2,y) );
216
+ }
217
+ */
218
+ };
219
+
220
+ /*
221
+ ////////////////////
222
+ template<class Y, class X1, class X2, class X3, class P>
223
+ class CPT4DModel : public GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P> {
224
+ public:
225
+ typedef typename GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::IterVal IterVal;
226
+
227
+ bool setFirst ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3 ) const {
228
+ return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::setFirst ( ixyp, MapKey3D<X1,X2,X3>(x1,x2,x3) );
229
+ }
230
+ bool setNext ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3 ) const {
231
+ return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::setNext ( ixyp, MapKey3D<X1,X2,X3>(x1,x2,x3) );
232
+ }
233
+ bool contains ( const Y& y, const X1& x1, const X2& x2, const X3& x3 ) const {
234
+ return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::contains ( y, MapKey3D<X1,X2,X3>(x1,x2,x3) );
235
+ }
236
+ bool contains ( const X1& x1, const X2& x2, const X3& x3 ) const {
237
+ return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::contains ( MapKey3D<X1,X2,X3>(x1,x2,x3) );
238
+ }
239
+ P getProb ( const IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3 ) const {
240
+ return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::getProb ( ixyp, MapKey3D<X1,X2,X3>(x1,x2,x3) );
241
+ }
242
+ P getProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3 ) const {
243
+ return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::getProb ( y, MapKey3D<X1,X2,X3>(x1,x2,x3) );
244
+ }
245
+ P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3 ) {
246
+ return GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::setProb ( y, MapKey3D<X1,X2,X3>(x1,x2,x3) );
247
+ }
248
+ bool readFields ( Array<char*>& aps ) {
249
+ if ( 6==aps.size() ) {
250
+ GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::setProb ( Y(aps[4]), MapKey3D<X1,X2,X3>(aps[1],aps[2],aps[3]) ) = atof(aps[5]);
251
+ return true;
252
+ }
253
+ return false;
254
+ }
255
+ };
256
+
257
+
258
+ ////////////////////
259
+ template<class Y, class X1, class X2, class X3, class X4, class P>
260
+ class CPT5DModel : public GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P> {
261
+ public:
262
+ typedef typename GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::IterVal IterVal;
263
+
264
+ bool setFirst ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
265
+ return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::setFirst ( ixyp, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
266
+ }
267
+ bool setNext ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
268
+ return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::setNext ( ixyp, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
269
+ }
270
+ bool contains ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
271
+ return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::contains ( y, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
272
+ }
273
+ bool contains ( const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
274
+ return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::contains ( MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
275
+ }
276
+ P getProb ( const IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
277
+ return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::getProb ( ixyp, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
278
+ }
279
+ P getProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) const {
280
+ return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::getProb ( y, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
281
+ }
282
+ P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4 ) {
283
+ return GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::setProb ( y, MapKey4D<X1,X2,X3,X4>(x1,x2,x3,x4) );
284
+ }
285
+ bool readFields ( Array<char*>& aps ) {
286
+ if ( 7==aps.size() ) {
287
+ GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::setProb ( Y(aps[5]), MapKey4D<X1,X2,X3,X4>(aps[1],aps[2],aps[3],aps[4]) ) = atof(aps[6]);
288
+ return true;
289
+ }
290
+ return false;
291
+ }
292
+ };
293
+
294
+
295
+ ////////////////////
296
+ template<class Y, class X1, class X2, class X3, class X4, class X5, class P>
297
+ class RACPT6DModel : public GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P> {
298
+ public:
299
+ typedef typename GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::IterVal IterVal;
300
+
301
+ bool setFirst ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
302
+ return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::setFirst ( ixyp, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
303
+ }
304
+ bool setNext ( IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
305
+ return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::setNext ( ixyp, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
306
+ }
307
+ bool contains ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
308
+ return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::contains ( y, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
309
+ }
310
+ bool contains ( const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
311
+ return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::contains ( MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
312
+ }
313
+ P getProb ( const IterVal& ixyp, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
314
+ return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::getProb ( ixyp, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
315
+ }
316
+ P getProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) const {
317
+ return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::getProb ( y, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
318
+ }
319
+ P& setProb ( const Y& y, const X1& x1, const X2& x2, const X3& x3, const X4& x4, const X5& x5 ) {
320
+ return GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::setProb ( y, MapKey5D<X1,X2,X3,X4,X5>(x1,x2,x3,x4,x5) );
321
+ }
322
+ bool readFields ( Array<char*>& aps ) {
323
+ if ( 8==aps.size() ) {
324
+ GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::setProb ( Y(aps[6]), MapKey5D<X1,X2,X3,X4,X5>(aps[1],aps[2],aps[3],aps[4],aps[5]) ) = atof(aps[7]);
325
+ return true;
326
+ }
327
+ return false;
328
+ }
329
+ };
330
+
331
+ */
332
+ #endif //_NL_RACPT__
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-randvar.h ADDED
@@ -0,0 +1,593 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_RAND_VAR__
25
+ #define _NL_RAND_VAR__
26
+
27
+ #include <math.h>
28
+ #include <string>
29
+ #include "nl-string.h"
30
+ #include "nl-safeids.h"
31
+ #include "nl-stringindex.h"
32
+ #include "nl-prob.h"
33
+ #include "nl-hash.h"
34
+
35
+ ////////////////////////////////////////////////////////////////////////////////
36
+
37
+ template <class A, class B, class C>
38
+ class trip {
39
+ public:
40
+ trip ( ) { }
41
+ trip ( A& a, B& b, C& c ) : first(a), second(b), third(c) { }
42
+ A first;
43
+ B second;
44
+ C third;
45
+ friend ostream& operator<< ( ostream& os, const trip<A,B,C>& a ) { return os<<a.first<<","<<a.second<<","<<a.third; }
46
+ };
47
+
48
+ template <class A, class B, class C, class D>
49
+ class quad {
50
+ public:
51
+ quad ( ) { }
52
+ quad ( A& a, B& b, C& c, D& d ) : first(a), second(b), third(c), fourth(d) { }
53
+ A first;
54
+ B second;
55
+ C third;
56
+ D fourth;
57
+ friend ostream& operator<< ( ostream& os, const quad<A,B,C,D>& a ) { return os<<a.first<<","<<a.second<<","<<a.third<<","<<a.fourth; }
58
+ };
59
+
60
+ ///////////////////////////////////////////////////////////////////////////////
61
+ ///////////////////////////////////////////////////////////////////////////////
62
+ //
63
+ // DiscreteDomainRV template -- creates RV with a distinct set of values for domain T (unique class)
64
+ //
65
+ ////////////////////////////////////////////////////////////////////////////////
66
+
67
+ ////////////////////////////////////////////////////////////
68
+ template <class T>
69
+ class DiscreteDomain : public StringIndex {
70
+ public:
71
+ typedef T ValType;
72
+ int MAX_SIZE ;
73
+ DiscreteDomain ( ) : StringIndex() { }
74
+ DiscreteDomain ( int i ) : StringIndex() { MAX_SIZE=i; }
75
+ int addIndex ( const char* ps ) { int i=StringIndex::addIndex(ps); assert(i==T(i)); return i; }
76
+ };
77
+
78
+ ////////////////////////////////////////////////////////////
79
+ template <class T, DiscreteDomain<T>& domain>
80
+ class DiscreteDomainRV : public Id<T> {
81
+ private:
82
+
83
+ static String strTemp;
84
+
85
+ public:
86
+
87
+ typedef DiscreteDomainRV<T,domain> BaseType;
88
+
89
+ static const int NUM_VARS = 1;
90
+
91
+ ////////////////////
92
+ template<class P>
93
+ class ArrayDistrib : public Array<pair<DiscreteDomainRV<T,domain>,P> > {
94
+ };
95
+
96
+ ////////////////////
97
+ template<class P>
98
+ class ArrayIterator : public pair<SafePtr<const ArrayDistrib<P> >,Id<int> > {
99
+ public:
100
+ static const int NUM_ITERS = NUM_VARS;
101
+ operator DiscreteDomainRV<T,domain>() const { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
102
+ //const DiscreteDomainRV<T,domain>& toRV() { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
103
+ bool end ( ) const { return ( ArrayIterator<P>::second >= ArrayIterator<P>::first.getRef().getSize() ); }
104
+ ArrayIterator<P>& operator++ ( ) { ++ArrayIterator<P>::second; return *this; }
105
+ };
106
+
107
+ // Static extraction methods...
108
+ static const DiscreteDomain<T>& getDomain ( ) { return domain; }
109
+
110
+ // Constructor / destructor methods...
111
+ DiscreteDomainRV ( ) { Id<T>::set(0); }
112
+ DiscreteDomainRV ( int i ) { Id<T>::set(i); }
113
+ DiscreteDomainRV ( const char* ps ) { assert(ps!=NULL); Id<T>::set(domain.addIndex(ps)); }
114
+
115
+ // Specification methods...
116
+ template<class P>
117
+ DiscreteDomainRV<T,domain>& setVal ( const ArrayIterator<P>& it ) { *this=it; return *this; }
118
+ bool setFirst ( ) { Id<T>::set(0); return isValid(); }
119
+ bool setNext ( ) { Id<T>::setNext(); if (!isValid()){Id<T>::set(0); return false;} return true; }
120
+
121
+ // Extraction methods...
122
+ bool isValid ( ) const { return *this<domain.getSize(); } //return (this->Id<T>::operator<(domain.getSize())); }
123
+ int getIndex ( ) const { return Id<T>::toInt(); } // DO NOT DELETE THIS METHOD!!!!!!!!!!
124
+ string getString ( ) const { return domain.getString(Id<T>::toInt()); }
125
+
126
+ // Input / output methods...
127
+ friend ostream& operator<< ( ostream& os, const DiscreteDomainRV<T,domain>& rv ) { return os<<rv.getString(); }
128
+ friend String& operator<< ( String& str, const DiscreteDomainRV<T,domain>& rv ) { return str<<rv.getString(); }
129
+ friend pair<StringInput,DiscreteDomainRV<T,domain>*> operator>> ( const StringInput ps, DiscreteDomainRV<T,domain>& rv ) { return pair<StringInput,DiscreteDomainRV<T,domain>*>(ps,&rv); }
130
+ friend StringInput operator>> ( pair<StringInput,DiscreteDomainRV<T,domain>*> delimbuff, const char* psDlm ) {
131
+ if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
132
+ ////assert(*delimbuff.second<domain.getSize());
133
+ int j=0;
134
+ StringInput psIn = delimbuff.first;
135
+ if(psDlm[0]=='\0') { *delimbuff.second=psIn.c_str(); return psIn+strlen(psIn.c_str()); }
136
+ for(int i=0;psIn[i]!='\0';i++) {
137
+ if(psIn[i]==psDlm[j]) j++;
138
+ else j=0;
139
+ strTemp[i]=psIn[i];
140
+ if(j==int(strlen(psDlm))) { strTemp[i+1-j]='\0'; /*delimbuff.second->set(domain.addIndex(psIn.c_str()));*/ *delimbuff.second=strTemp.c_array(); return psIn+i+1;}
141
+ }
142
+ return NULL; //psIn;
143
+ }
144
+ };
145
+ template <class T, DiscreteDomain<T>& domain>
146
+ String DiscreteDomainRV<T,domain>::strTemp ( 100 );
147
+
148
+
149
+ /* DON'T COMMENT BACK IN!!! THIS HAS BEEN MOVED TO nl-refrv.h!!!!!!
150
+ ////////////////////////////////////////////////////////////
151
+ template <class T>
152
+ class RefRV : public Id<const T*> {
153
+ public:
154
+
155
+ typedef RefRV<T> BaseType;
156
+
157
+ static const int NUM_VARS = 1;
158
+ static const T DUMMY;
159
+
160
+ ////////////////////
161
+ template<class P>
162
+ class ArrayDistrib : public Array<pair<RefRV<T>,P> > {
163
+ };
164
+
165
+ ////////////////////
166
+ template<class P>
167
+ class ArrayIterator : public pair<SafePtr<const ArrayDistrib<P> >,Id<int> > {
168
+ public:
169
+ static const int NUM_ITERS = NUM_VARS;
170
+ operator RefRV<T>() const { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
171
+ //const DiscreteDomainRV<T,domain>& toRV() { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
172
+ };
173
+
174
+ // Constructor / destructor methods...
175
+ RefRV ( ) { Id<const T*>::set(NULL); }
176
+ RefRV ( const T& t ) { Id<const T*>::set(&t); }
177
+
178
+ // Specification methods...
179
+ template<class P>
180
+ RefRV<T>& setVal ( const ArrayIterator<P>& it ) { *this=it; return *this; }
181
+
182
+ // Extraction methods...
183
+ const T& getRef ( ) const { return (Id<const T*>::toInt()==NULL) ? DUMMY : *(static_cast<const T*>(Id<const T*>::toInt())); }
184
+
185
+ // Input / output methods..
186
+ friend ostream& operator<< ( ostream& os, const RefRV<T>& rv ) { return os <<&rv.getRef(); } //{ return os<<rv.getRef(); }
187
+ friend String& operator<< ( String& str, const RefRV<T>& rv ) { return str<<"addr"<<(long int)(void*)&rv.getRef(); } //{ return str<<rv.getRef(); }
188
+ friend pair<StringInput,RefRV<T>*> operator>> ( const StringInput ps, RefRV<T>& rv ) { return pair<StringInput,RefRV<T>*>(ps,&rv); }
189
+ friend StringInput operator>> ( pair<StringInput,RefRV<T>*> delimbuff, const char* psDlm ) {
190
+ if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
191
+ return NULL; //psIn;
192
+ }
193
+ };
194
+ template <class T> const T RefRV<T>::DUMMY;
195
+ */
196
+
197
+
198
+ ///////////////////////////////////////////////////////////////////////////////
199
+ ///////////////////////////////////////////////////////////////////////////////
200
+ //
201
+ // Joint2DRV
202
+ //
203
+ ////////////////////////////////////////////////////////////////////////////////
204
+
205
+ ////////////////////////////////////////////////////////////
206
+ template<class V1,class V2>
207
+ class Joint2DRV {
208
+
209
+ public:
210
+
211
+ V1 first;
212
+ V2 second;
213
+
214
+ // Constructor / destructor methods...
215
+ Joint2DRV ( ) { }
216
+ Joint2DRV ( const V1& v1, const V2& v2 ) { first=v1; second=v2; }
217
+
218
+ // Extraction methods...
219
+ size_t getHashKey ( ) const { size_t k=rotLeft(first.getHashKey(),3); k^=second.getHashKey();
220
+ /*fprintf(stderr," (%d) %d ^& %d = %d\n",sizeof(*this),x1.getHashKey(),x2.getHashKey(),k);*/ return k; }
221
+ bool operator< ( const Joint2DRV<V1,V2>& j ) const { return ( (first<j.first) ||
222
+ (first==j.first && second<j.second) ); }
223
+ bool operator== ( const Joint2DRV<V1,V2>& j ) const { return ( first==j.first && second==j.second ); }
224
+ bool operator!= ( const Joint2DRV<V1,V2>& j ) const { return ( !(first==j.first && second==j.second) ); }
225
+ };
226
+
227
+
228
+ ////////////////////////////////////////////////////////////
229
+ template<char* SD1,class V1,char* SD2,class V2,char* SD3>
230
+ class DelimitedJoint2DRV : public Joint2DRV<V1,V2> {
231
+
232
+ public:
233
+
234
+ static const int NUM_VARS = V1::NUM_VARS + V2::NUM_VARS;
235
+
236
+ ////////////////////
237
+ template<class P>
238
+ class ArrayIterator : public pair<typename V1::template ArrayIterator<P>, typename V2::template ArrayIterator<P> > {
239
+ public:
240
+ // static const int NUM_ITERS = (typename V1::template ArrayIterator<P>)::NUM_ITERS + (typename V2::template ArrayIterator<P>)::NUM_ITERS;
241
+ static const int NUM_ITERS = NUM_VARS;
242
+ // DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& set ( DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& rv ) const { first.set(rv.first=first); rv.second=second; return rv; }
243
+ friend ostream& operator<< ( ostream& os, const ArrayIterator<P>& rv ) {
244
+ return os<<SD1<<rv.first<<SD2<<rv.second<<SD3; }
245
+ };
246
+
247
+ // Constructor / destructor methods...
248
+ DelimitedJoint2DRV ( ) : Joint2DRV<V1,V2>() { }
249
+ DelimitedJoint2DRV ( const V1& v1, const V2& v2 ) : Joint2DRV<V1,V2>(v1,v2) { }
250
+ DelimitedJoint2DRV ( char* ps ) : Joint2DRV<V1,V2>() { ps>>*this>>"\0"; }
251
+ DelimitedJoint2DRV ( const char* ps ) : Joint2DRV<V1,V2>() { strdup(ps)>>*this>>"\0"; } //DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>(strdup(ps)) { }
252
+
253
+ // Specification methods...
254
+ template<class P>
255
+ DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& setVal ( const ArrayIterator<P>& it ) {
256
+ Joint2DRV<V1,V2>::first.setVal(it.first); Joint2DRV<V1,V2>::second.setVal(it.second); return *this; }
257
+
258
+ // Extraction methods...
259
+ bool operator==(const DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& vv) const { return Joint2DRV<V1,V2>::operator==(vv); }
260
+ bool operator< (const DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& vv) const { return Joint2DRV<V1,V2>::operator<(vv); }
261
+
262
+ // Input / output methods...
263
+ friend ostream& operator<< ( ostream& os, const DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& rv ) { return os<<SD1<<rv.first<<SD2<<rv.second<<SD3; }
264
+ friend String& operator<< ( String& str, const DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& rv ) { return str<<SD1<<rv.first<<SD2<<rv.second<<SD3; }
265
+ friend IStream operator>> ( pair<IStream,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*> is_x, const char* psDlm ) {
266
+ IStream& is = is_x.first;
267
+ DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& x = *is_x.second;
268
+ // Propagate fail...
269
+ if ( IStream()==is ) return is;
270
+ // Use last delimiter only if not empty (otherwise it will immediately trivially match)...
271
+ return ( (SD3[0]=='\0') ? is>>SD1>>x.first>>SD2>>x.second>>psDlm
272
+ : is>>SD1>>x.first>>SD2>>x.second>>SD3>>psDlm );
273
+ }
274
+
275
+ // OBSOLETE!
276
+ friend pair<StringInput,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*> operator>> ( StringInput ps, DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& rv ) { return pair<StringInput,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*>(ps,&rv); }
277
+ friend StringInput operator>> ( pair<StringInput,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*> delimbuff, const char* psDlm ) {
278
+ if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
279
+ return ( (SD3[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>psDlm
280
+ : delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>psDlm );
281
+ }
282
+ };
283
+
284
+
285
+
286
+ ///////////////////////////////////////////////////////////////////////////////
287
+ ///////////////////////////////////////////////////////////////////////////////
288
+ //
289
+ // Joint3DRV
290
+ //
291
+ ////////////////////////////////////////////////////////////////////////////////
292
+
293
+ template<class V1,class V2,class V3>
294
+ class Joint3DRV {
295
+
296
+ public:
297
+
298
+ V1 first;
299
+ V2 second;
300
+ V3 third;
301
+
302
+ // Constructor / destructor methods...
303
+ Joint3DRV ( ) { }
304
+ Joint3DRV ( const V1& v1, const V2& v2, const V3& v3 ) { first=v1; second=v2; third=v3; }
305
+
306
+ /*
307
+ // Specification methods...
308
+ bool operator< ( const Joint3DRV<V1,V2,V3>& j ) const {
309
+ return ( (x1<j.x1) ||
310
+ (x1==j.x1 && x2<j.x2) ||
311
+ (x1==j.x1 && x2==j.x2 && x3<j.x3) ) ;
312
+ }
313
+ */
314
+
315
+ // Extraction methods...
316
+ size_t getHashKey ( ) const { size_t k=rotLeft(first.getHashKey(),3); k^=second.getHashKey(); k=rotLeft(k,3); k^=third.getHashKey();
317
+ /*fprintf(stderr," (%d) %d ^& %d = %d\n",sizeof(*this),x1.getHashKey(),x2.getHashKey(),k);*/ return k; }
318
+ // bool operator< ( const Joint2DRV<V1,V2>& j ) const { return ( (first<j.first) ||
319
+ // (first==j.first && second<j.second) ); }
320
+ bool operator== ( const Joint3DRV<V1,V2,V3>& j ) const { return ( first==j.first && second==j.second && third==j.third ); }
321
+ bool operator!= ( const Joint3DRV<V1,V2,V3>& j ) const { return ( !(first==j.first && second==j.second && third==j.third) ); }
322
+ };
323
+
324
+ ////////////////////////////////////////////////////////////
325
+ template<char* SD1,class V1,char* SD2,class V2,char* SD3,class V3,char* SD4>
326
+ class DelimitedJoint3DRV : public Joint3DRV<V1,V2,V3> {
327
+
328
+ public:
329
+
330
+ static const int NUM_VARS = V1::NUM_VARS + V2::NUM_VARS + V3::NUM_VARS;
331
+
332
+ ////////////////////
333
+ template<class P>
334
+ class ArrayIterator : public trip<typename V1::template ArrayIterator<P>, typename V2::template ArrayIterator<P>, typename V3::template ArrayIterator<P> > {
335
+ public:
336
+ // static const int NUM_ITERS = (typename V1::template ArrayIterator<P>)::NUM_ITERS + (typename V2::template ArrayIterator<P>)::NUM_ITERS;
337
+ static const int NUM_ITERS = NUM_VARS;
338
+ friend ostream& operator<< ( ostream& os, const ArrayIterator<P>& rv ) {
339
+ return os<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4; }
340
+ };
341
+
342
+ // Constructor / destructor methods...
343
+ DelimitedJoint3DRV ( ) : Joint3DRV<V1,V2,V3>() { }
344
+ DelimitedJoint3DRV ( const V1& v1, const V2& v2, const V3& v3 ) : Joint3DRV<V1,V2,V3>(v1,v2,v3) { }
345
+ DelimitedJoint3DRV ( char* ps ) : Joint3DRV<V1,V2,V3>() { ps>>*this>>"\0"; }
346
+ DelimitedJoint3DRV ( const char* ps ) : Joint3DRV<V1,V2,V3>() { strdup(ps)>>*this>>"\0"; }
347
+
348
+ // Specification methods...
349
+ template<class P>
350
+ DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& setVal ( const ArrayIterator<P>& it ) {
351
+ Joint3DRV<V1,V2,V3>::first.setVal(it.first); Joint3DRV<V1,V2,V3>::second.setVal(it.second); Joint3DRV<V1,V2,V3>::third.setVal(it.third); return *this; }
352
+
353
+ // Extraction methods...
354
+ bool operator==(const DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& vvv) const { return Joint3DRV<V1,V2,V3>::operator==(vvv); }
355
+ bool operator< (const DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& vvv) const { return Joint3DRV<V1,V2,V3>::operator< (vvv); }
356
+
357
+ // Input / output methods...
358
+ friend ostream& operator<< ( ostream& os, const DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& rv ) { return os<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4; }
359
+ friend String& operator<< ( String& str, const DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& rv ) { return str<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4; }
360
+ friend pair<StringInput,DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>*> operator>> ( StringInput ps, DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>& rv ) {
361
+ return pair<StringInput,DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>*>(ps,&rv); }
362
+ friend StringInput operator>> ( pair<StringInput,DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>*> delimbuff, const char* psDlm ) {
363
+ if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
364
+ return ( (SD4[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>psDlm
365
+ : delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>SD4>>psDlm );
366
+ }
367
+ };
368
+
369
+ ///////////////////////////////////////////////////////////////////////////////
370
+ ///////////////////////////////////////////////////////////////////////////////
371
+ //
372
+ // Joint4DRV
373
+ //
374
+ ////////////////////////////////////////////////////////////////////////////////
375
+
376
+ template<class V1,class V2,class V3, class V4>
377
+ class Joint4DRV {
378
+
379
+ public:
380
+
381
+ V1 first;
382
+ V2 second;
383
+ V3 third;
384
+ V4 fourth;
385
+
386
+ // Constructor / destructor methods...
387
+ Joint4DRV ( ) { }
388
+ Joint4DRV ( const V1& v1, const V2& v2, const V3& v3, const V4& v4 ) { first=v1; second=v2; third=v3; fourth=v4;}
389
+ // Extraction methods...
390
+ size_t getHashKey ( ) const { size_t k=rotLeft(first.getHashKey(),3); k^=second.getHashKey(); k=rotLeft(k,3); k^=third.getHashKey();k^=fourth.getHashKey();
391
+ /*fprintf(stderr," (%d) %d ^& %d = %d\n",sizeof(*this),x1.getHashKey(),x2.getHashKey(),k);*/ return k; }
392
+ // bool operator< ( const Joint2DRV<V1,V2>& j ) const { return ( (first<j.first) ||
393
+ // (first==j.first && second<j.second) ); }
394
+ bool operator== ( const Joint4DRV<V1,V2,V3,V4>& j ) const { return ( first==j.first && second==j.second && third==j.third && fourth==j.fourth ); }
395
+ bool operator!= ( const Joint4DRV<V1,V2,V3,V4>& j ) const { return ( !(first==j.first && second==j.second && third==j.third && fourth==j.fourth) ); }
396
+ };
397
+
398
+ ////////////////////////////////////////////////////////////
399
+ template<char* SD1,class V1,char* SD2,class V2,char* SD3,class V3,char* SD4,class V4, char* SD5>
400
+ class DelimitedJoint4DRV : public Joint4DRV<V1,V2,V3,V4> {
401
+
402
+ public:
403
+
404
+ static const int NUM_VARS = V1::NUM_VARS + V2::NUM_VARS + V3::NUM_VARS+ V4::NUM_VARS;
405
+
406
+ ////////////////////
407
+ template<class P>
408
+ class ArrayIterator : public quad<typename V1::template ArrayIterator<P>, typename V2::template ArrayIterator<P>, typename V3::template ArrayIterator<P> , typename V4::template ArrayIterator<P> > {
409
+ public:
410
+ // static const int NUM_ITERS = (typename V1::template ArrayIterator<P>)::NUM_ITERS + (typename V2::template ArrayIterator<P>)::NUM_ITERS;
411
+ static const int NUM_ITERS = NUM_VARS;
412
+ friend ostream& operator<< ( ostream& os, const ArrayIterator<P>& rv ) {
413
+ return os<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4<<rv.fourth<<SD5; }
414
+ };
415
+
416
+ // Constructor / destructor methods...
417
+ DelimitedJoint4DRV ( ) : Joint4DRV<V1,V2,V3,V4>() { }
418
+ DelimitedJoint4DRV ( const V1& v1, const V2& v2, const V3& v3, const V4& v4 ) : Joint4DRV<V1,V2,V3,V4>(v1,v2,v3,v4) { }
419
+ DelimitedJoint4DRV ( char* ps ) : Joint4DRV<V1,V2,V3,V4>() { ps>>*this>>"\0"; }
420
+ DelimitedJoint4DRV ( const char* ps ) : Joint4DRV<V1,V2,V3,V4>() { strdup(ps)>>*this>>"\0"; }
421
+
422
+ // Specification methods...
423
+ template<class P>
424
+ DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& setVal ( const ArrayIterator<P>& it ) {
425
+ Joint4DRV<V1,V2,V3,V4>::first.setVal(it.first);
426
+ Joint4DRV<V1,V2,V3,V4>::second.setVal(it.second);
427
+ Joint4DRV<V1,V2,V3,V4>::third.setVal(it.third);
428
+ Joint4DRV<V1,V2,V3,V4>::fourth.setVal(it.fourth);
429
+ return *this;
430
+ }
431
+
432
+ // Extraction methods...
433
+ bool operator==(const DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& vvvv) const { return Joint4DRV<V1,V2,V3,V4>::operator==(vvvv); }
434
+ bool operator< (const DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& vvvv) const { return Joint4DRV<V1,V2,V3,V4>::operator< (vvvv); }
435
+
436
+ // Input / output methods...
437
+ friend ostream& operator<< ( ostream& os, const DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& rv ) { return os<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4<<rv.fourth<<SD5; }
438
+ friend String& operator<< ( String& str, const DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& rv ) { return str<<SD1<<rv.first<<SD2<<rv.second<<SD3<<rv.third<<SD4<<rv.fourth<<SD5; }
439
+ friend pair<StringInput,DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>*> operator>> ( StringInput ps, DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>& rv ) {
440
+ return pair<StringInput,DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>*>(ps,&rv); }
441
+ friend StringInput operator>> ( pair<StringInput,DelimitedJoint4DRV<SD1,V1,SD2,V2,SD3,V3,SD4,V4,SD5>*> delimbuff, const char* psDlm ) {
442
+ if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
443
+ return ( (SD5[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>SD4>>delimbuff.second->fourth>>psDlm
444
+ : delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>SD4>>delimbuff.second->fourth>>SD5>>psDlm );
445
+ }
446
+ };
447
+
448
+
449
+ ////////////////////////////////////////////////////////////////////////////////
450
+ ////////////////////////////////////////////////////////////////////////////////
451
+ //
452
+ // JointArrayRV<T,I>
453
+ //
454
+ ////////////////////////////////////////////////////////////////////////////////
455
+
456
+ template <int I, class T>
457
+ class JointArrayRV {
458
+ private:
459
+ // Data members...
460
+ T at[I];
461
+ public:
462
+ typedef T ElementType;
463
+
464
+ /*
465
+ // Constructor / destructor methods...
466
+ JointArrayRV ( ) { }
467
+ JointArrayRV ( const T& t ) { for(int i=0;i<I;i++) at[i]=t; }
468
+ */
469
+
470
+ // Static extraction methods...
471
+ static const int SIZE = I;
472
+ static const int getSize ( ) { return I; }
473
+
474
+ // Specification methods...
475
+ T& set (int i) { assert(0<=i); assert(i<I); return at[i]; }
476
+
477
+ // Extraction methods...
478
+ const T& get (int i) const { assert(NULL!=this); assert(0<=i); assert(i<I); return at[i]; }
479
+ bool operator< ( const JointArrayRV<I,T>& a ) const {
480
+ int i;
481
+ for ( i=0; at[i]==a.at[i] && i<I; i++ ) ;
482
+ return ( i<I && at[i]<a.at[i] ) ;
483
+ }
484
+ bool operator== ( const JointArrayRV<I,T>& a ) const {
485
+ int i;
486
+ for ( i=0; at[i]==a.at[i] && i<I; i++ ) ;
487
+ return ( i==I ) ;
488
+ }
489
+ size_t getHashKey ( ) const { size_t k=0; for(int i=0;i<I;i++){k=rotLeft(k,3); k^=get(i).getHashKey(); } return k; }
490
+ };
491
+
492
+ ////////////////////////////////////////////////////////////////////////////////
493
+
494
+ template <int I, char* SD, class T>
495
+ class DelimitedJointArrayRV : public JointArrayRV<I,T> {
496
+ public:
497
+
498
+ static const int NUM_VARS = T::NUM_VARS * I;
499
+
500
+ ////////////////////
501
+ template<class P>
502
+ class ArrayIterator : public StaticSafeArray<I,typename T::template ArrayIterator<P> > {
503
+ public:
504
+ static const int NUM_ITERS = NUM_VARS;
505
+ // static const int NUM_ITERS = (typename T::template ArrayIterator<P>)::NUM_ITERS * I;
506
+ friend ostream& operator<< ( ostream& os, const ArrayIterator<P>& rv ) { for(int i=0;i<I;i++) os<<((i==0)?"":SD)<<rv.get(i); return os; }
507
+ };
508
+
509
+ // Specification methods...
510
+ template<class P>
511
+ DelimitedJointArrayRV<I,SD,T>& setVal ( const ArrayIterator<P>& it ) {
512
+ for(int i=0;i<I;i++) JointArrayRV<I,T>::set(i).setVal(it.get(i)); return *this; }
513
+
514
+ // Extraction methods...
515
+ bool operator==(const DelimitedJointArrayRV<I,SD,T>& a) const { return JointArrayRV<I,T>::operator==(a); }
516
+ bool operator< (const DelimitedJointArrayRV<I,SD,T>& a) const { return JointArrayRV<I,T>::operator<(a); }
517
+
518
+ // Input / output methods...
519
+ friend ostream& operator<< ( ostream& os, const DelimitedJointArrayRV<I,SD,T>& a ) { for(int i=0;i<I;i++) os<<((i==0)?"":SD)<<a.get(i); return os; }
520
+ friend String& operator<< ( String& str, const DelimitedJointArrayRV<I,SD,T>& a ) { for(int i=0;i<I;i++)str<<((i==0)?"":SD)<<a.get(i); return str; }
521
+ friend IStream operator>> ( pair<IStream,DelimitedJointArrayRV<I,SD,T>*> is_x, const char* psDlm ) {
522
+ IStream& is = is_x.first;
523
+ DelimitedJointArrayRV<I,SD,T>& x = *is_x.second;
524
+ if (IStream()==is) return IStream();
525
+ for(int i=0;i<I;i++)
526
+ is = pair<IStream,T*>(is,&x.set(i))>>((i<I-1)?SD:psDlm);
527
+ return is;
528
+ }
529
+
530
+ // OBSOLETE!
531
+ friend pair<StringInput,DelimitedJointArrayRV<I,SD,T>*> operator>> ( StringInput ps, DelimitedJointArrayRV<I,SD,T>& a ) { return pair<StringInput,DelimitedJointArrayRV<I,SD,T>*>(ps,&a); }
532
+ friend StringInput operator>> ( pair<StringInput,DelimitedJointArrayRV<I,SD,T>*> delimbuff, const char* psDlm ) {
533
+ if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
534
+ StringInput psIn = delimbuff.first;
535
+ for(int i=0;i<I;i++)
536
+ psIn = pair<StringInput,T*>(psIn,&delimbuff.second->set(i))>>((i<I-1)?SD:psDlm);
537
+ return psIn;
538
+ }
539
+ };
540
+
541
+
542
+ ///////////////////////////////////////////////////////////////////////////////
543
+ ////////////////////////////////////////////////////////////////////////////////
544
+ //
545
+ // History<T,N>
546
+ //
547
+ ////////////////////////////////////////////////////////////////////////////////
548
+
549
+ template <int N,class T>
550
+ class History {
551
+ private:
552
+ // Data members...
553
+ StaticSafeArray<N,T> at;
554
+ public:
555
+ // Constructor / destructor methods...
556
+ History ( ) { }
557
+ History ( char* ps ) { ps>>*this>>"\0"; }
558
+ /*
559
+ History ( char* ps ) { read(ps); }
560
+ */
561
+ // History ( const string& s ) { read(s.c_str()); }
562
+ // Specification methods...
563
+ void advanceHistory(const T& t) { for(int i=N-1;i>0;i--)at.set(i)=at.get(i-1); at.set(0)=t; }
564
+ T& advanceHistory() { for(int i=N-1;i>0;i--)at.set(i)=at.get(i-1); return at.set(0); }
565
+ T& setBack(int i) { return at.set(i); }
566
+ // Extraction methods...
567
+ const T& getBack(int i) const { assert(i>=0); assert(i<N); return at.get(i); }
568
+ // Input / output methods...
569
+ /*
570
+ void read ( char* ps, const ReaderContext& rc=ReaderContext() ) { char* psT; for(int i=0;i<N;i++){char* z=strtok_r((0==i)?ps:NULL,";",&psT); assert(z); at.set(i).read(z);} }
571
+ //at.set(i).read(strtok_r((0==i)?ps:NULL,";",&psT)); }
572
+ */
573
+
574
+ friend ostream& operator<< ( ostream& os, const History<N,T>& a ) { for(int i=0;i<N;i++)os<<((i==0)?"":";")<<a.getBack(i); return os; }
575
+ friend pair<StringInput,History<N,T>*> operator>> ( StringInput ps, History<N,T>& a ) { return pair<StringInput,History<N,T>*>(ps,&a); }
576
+ friend StringInput operator>> ( pair<StringInput,History<N,T>*> delimbuff, const char* psDlm ) {
577
+ if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
578
+ StringInput psIn = delimbuff.first;
579
+ for(int i=0;i<N;i++)
580
+ psIn = pair<StringInput,T*>(psIn,&delimbuff.second->setBack(i))>>((i<N-1)?";":psDlm);
581
+ return psIn;
582
+ }
583
+
584
+ /*
585
+ void write ( FILE* pf ) const { for(int i=0;i<N;i++) {fprintf(pf,(0==i)?"":";"); at.get(i).write(pf);} }
586
+ */
587
+ };
588
+
589
+ ///////////////////////////////////////////////////////////////////////////////
590
+ ///////////////////////////////////////////////////////////////////////////////
591
+
592
+
593
+ #endif //_NL_RAND_VAR__
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-refrv.h ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ ////////////////////////////////////////////////////////////
25
+ //template <class T>
26
+ template<class T, map<T,T>& domain>
27
+ class RefRV : public Id<const T*> {
28
+ public:
29
+
30
+ typedef RefRV<T,domain> BaseType;
31
+
32
+ static const int NUM_VARS = 1;
33
+ static const T DUMMY;
34
+
35
+ ////////////////////
36
+ template<class P>
37
+ class ArrayDistrib : public Array<pair<RefRV<T,domain>,P> > {
38
+ };
39
+
40
+ ////////////////////
41
+ template<class P>
42
+ class ArrayIterator : public pair<SafePtr<const ArrayDistrib<P> >,Id<int> > {
43
+ public:
44
+ static const int NUM_ITERS = NUM_VARS;
45
+ operator RefRV<T,domain>() const { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
46
+ //const DiscreteDomainRV<T,domain>& toRV() { return ArrayIterator<P>::first.getRef().get(ArrayIterator<P>::second.toInt()).first; }
47
+ };
48
+
49
+ // Constructor / destructor methods...
50
+ RefRV ( ) { Id<const T*>::set(NULL); }
51
+ //RefRV ( int i ) { Id<const T*>::set(i); }
52
+ //RefRV ( const T& t ) { Id<const T*>::set(&t); }
53
+ RefRV ( const T& t ) { if(domain.find(t)==domain.end()) *(const_cast<T*>(Id<const T*>::set(&domain[t]).toInt())) = t;
54
+ else Id<const T*>::set(&domain[t]); }
55
+
56
+ // Specification methods...
57
+ template<class P>
58
+ RefRV<T,domain>& setVal ( const ArrayIterator<P>& it ) { *this=it; return *this; }
59
+ //T& setRef ( ) { return Id<const T*>::setRef(); }
60
+
61
+ // Extraction methods...
62
+ const T& getRef ( ) const { return (Id<const T*>::toInt()==NULL) ? DUMMY : *(static_cast<const T*>(Id<const T*>::toInt())); }
63
+ static map<T,T>& setDomain ( ) { return domain; }
64
+
65
+ // Input / output methods..
66
+ friend ostream& operator<< ( ostream& os, const RefRV<T,domain>& rv ) { return os <<&rv.getRef(); } //{ return os<<rv.getRef(); }
67
+ friend String& operator<< ( String& str, const RefRV<T,domain>& rv ) { return str<<"addr"<<(long int)(void*)&rv.getRef(); } //{ return str<<rv.getRef(); }
68
+ friend pair<StringInput,RefRV<T,domain>*> operator>> ( const StringInput ps, RefRV<T,domain>& rv ) { return pair<StringInput,RefRV<T,domain>*>(ps,&rv); }
69
+ friend StringInput operator>> ( pair<StringInput,RefRV<T,domain>*> delimbuff, const char* psDlm ) {
70
+ if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
71
+ return NULL; //psIn;
72
+ }
73
+ };
74
+ template <class T, map<T,T>& domain> const T RefRV<T,domain>::DUMMY;
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-timer.h ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ #ifndef _NL_TIMER__
25
+ #define _NL_TIMER__
26
+
27
+ #include <sys/time.h>
28
+
29
+ class Timer {
30
+ private:
31
+ struct timeval kept;
32
+ struct timeval beg;
33
+ public:
34
+ Timer ( ) { gettimeofday(&beg,NULL); kept.tv_sec=0; kept.tv_usec=0; }
35
+ void start ( ) { gettimeofday(&beg,NULL); }
36
+ void pause ( ) {
37
+ struct timeval now; gettimeofday(&now,NULL);
38
+ kept.tv_sec += now.tv_sec - beg.tv_sec;
39
+ kept.tv_usec += (now.tv_usec - beg.tv_usec)%1000000;
40
+ kept.tv_sec += int((now.tv_usec - beg.tv_usec)/1000000);
41
+ }
42
+ double elapsed ( ) { // in milliseconds.
43
+ return (double(kept.tv_sec)*1000.0 + double(kept.tv_usec)/1000.0);
44
+ //struct timeval end; gettimeofday(&end,NULL);
45
+ //double beg_time_s = (double) beg.tv_sec + (double) ((double)beg.tv_usec / 1000000.0);
46
+ //double end_time_s = (double) end.tv_sec + (double) ((double)end.tv_usec / 1000000.0);
47
+ //return ( (end_time_s - beg_time_s) * 1000.0 );
48
+ }
49
+ };
50
+
51
+ #endif //_NL_TIMER__
52
+
mosesdecoder/contrib/synlm/hhmm/rvtl/include/nl-tree.h ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ///////////////////////////////////////////////////////////////////////////////
2
+ // //
3
+ // This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. //
4
+ // //
5
+ // ModelBlocks is free software: you can redistribute it and/or modify //
6
+ // it under the terms of the GNU General Public License as published by //
7
+ // the Free Software Foundation, either version 3 of the License, or //
8
+ // (at your option) any later version. //
9
+ // //
10
+ // ModelBlocks is distributed in the hope that it will be useful, //
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of //
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
13
+ // GNU General Public License for more details. //
14
+ // //
15
+ // You should have received a copy of the GNU General Public License //
16
+ // along with ModelBlocks. If not, see <http://www.gnu.org/licenses/>. //
17
+ // //
18
+ // ModelBlocks developers designate this particular file as subject to //
19
+ // the "Moses" exception as provided by ModelBlocks developers in //
20
+ // the LICENSE file that accompanies this code. //
21
+ // //
22
+ ///////////////////////////////////////////////////////////////////////////////
23
+
24
+ template<class B, class T>
25
+ class Tree : public T {
26
+ private:
27
+ // Data members...
28
+ SimpleHash<B,Tree<B,T>*> apt;
29
+ static const Tree<B,T> tDummy;
30
+ public:
31
+ // Constructor / destructor methods...
32
+ ~Tree ( ) { for(typename SimpleHash<B,Tree<B,T>*>::iterator i=apt.begin(); i!=apt.end(); i++) delete i->second; }
33
+ Tree ( ) { }
34
+ // Tree ( const Tree<T>& t ) { ptL = (t.ptL) ? new Tree<T>(*t.ptL) : NULL;
35
+ // ptR = (t.ptR) ? new Tree<T>(*t.ptR) : NULL; }
36
+ // Extraction methods...
37
+ const bool isTerm ( ) const { return (apt.empty()); }
38
+ const Tree<B,T>& getBranch ( const B& b ) const { return (apt.find(b)!=apt.end()) ? *apt.find(b)->second : tDummy; }
39
+ // Specification methods...
40
+ Tree<B,T>& setBranch ( const B& b ) { if (apt.find(b)==apt.end()) apt[b]=new Tree<B,T>(); return *apt[b]; }
41
+ };
42
+ template<class B, class T> const Tree<B,T> Tree<B,T>::tDummy;// = Tree<B,T>();
43
+
mosesdecoder/contrib/zmert-moses.pl ADDED
@@ -0,0 +1,1121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/perl -w
2
+
3
+ # Usage:
4
+ # zmert-moses.pl <foreign> <english> <decoder-executable> <decoder-config>
5
+ # For other options see below or run 'zmert-moses.pl --help'
6
+
7
+ # Notes:
8
+ # <foreign> and <english> should be raw text files, one sentence per line
9
+ # <english> can be a prefix, in which case the files are <english>0, <english>1, etc. are used
10
+
11
+ # Revision history
12
+
13
+ # 29 Dec 2009 Derived from mert-moses-new.pl (Kamil Kos)
14
+
15
+ use FindBin qw($RealBin);
16
+ use File::Basename;
17
+ my $SCRIPTS_ROOTDIR = $RealBin;
18
+ $SCRIPTS_ROOTDIR =~ s/\/training$//;
19
+ $SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
20
+
21
+ # for each _d_istortion, _l_anguage _m_odel, _t_ranslation _m_odel and _w_ord penalty, there is a list
22
+ # of [ default value, lower bound, upper bound ]-triples. In most cases, only one triple is used,
23
+ # but the translation model has currently 5 features
24
+
25
+ # defaults for initial values and ranges are:
26
+
27
+ my $default_triples = {
28
+ # these two basic models exist even if not specified, they are
29
+ # not associated with any model file
30
+ "w" => [ [ 0.0, -1.0, 1.0 ] ], # word penalty
31
+ };
32
+
33
+ my $additional_triples = {
34
+ # if the more lambda parameters for the weights are needed
35
+ # (due to additional tables) use the following values for them
36
+ "d" => [ [ 1.0, 0.0, 2.0 ], # lexicalized reordering model
37
+ [ 1.0, 0.0, 2.0 ],
38
+ [ 1.0, 0.0, 2.0 ],
39
+ [ 1.0, 0.0, 2.0 ],
40
+ [ 1.0, 0.0, 2.0 ],
41
+ [ 1.0, 0.0, 2.0 ],
42
+ [ 1.0, 0.0, 2.0 ] ],
43
+ "lm" => [ [ 1.0, 0.0, 2.0 ] ], # language model
44
+ "g" => [ [ 1.0, 0.0, 2.0 ], # generation model
45
+ [ 1.0, 0.0, 2.0 ] ],
46
+ "tm" => [ [ 0.3, 0.0, 0.5 ], # translation model
47
+ [ 0.2, 0.0, 0.5 ],
48
+ [ 0.3, 0.0, 0.5 ],
49
+ [ 0.2, 0.0, 0.5 ],
50
+ [ 0.0,-1.0, 1.0 ] ], # ... last weight is phrase penalty
51
+ "lex"=> [ [ 0.1, 0.0, 0.2 ] ], # global lexical model
52
+ };
53
+
54
+ # moses.ini file uses FULL names for lambdas, while this training script internally (and on the command line)
55
+ # uses ABBR names.
56
+ my $ABBR_FULL_MAP = "d=weight-d lm=weight-l tm=weight-t w=weight-w g=weight-generation lex=weight-lex";
57
+ my %ABBR2FULL = map {split/=/,$_,2} split /\s+/, $ABBR_FULL_MAP;
58
+ my %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} split /\s+/, $ABBR_FULL_MAP;
59
+
60
+ # We parse moses.ini to figure out how many weights do we need to optimize.
61
+ # For this, we must know the correspondence between options defining files
62
+ # for models and options assigning weights to these models.
63
+ my $TABLECONFIG_ABBR_MAP = "ttable-file=tm lmodel-file=lm distortion-file=d generation-file=g global-lexical-file=lex";
64
+ my %TABLECONFIG2ABBR = map {split(/=/,$_,2)} split /\s+/, $TABLECONFIG_ABBR_MAP;
65
+
66
+ # There are weights that do not correspond to any input file, they just increase the total number of lambdas we optimize
67
+ #my $extra_lambdas_for_model = {
68
+ # "w" => 1, # word penalty
69
+ # "d" => 1, # basic distortion
70
+ #};
71
+
72
+ my $verbose = 0;
73
+ my $___MERT_VERBOSE = 1; # verbosity of zmert (values: 0-2)
74
+ my $___DECODER_VERBOSE = 1; # should decoder output be included? - 0:no,1:yes
75
+ my $___SAVE_INTER = 2; # save intermediate nbest-lists
76
+ my $usage = 0; # request for --help
77
+ my $___WORKING_DIR = "mert-work";
78
+ my $___DEV_F = undef; # required, input text to decode
79
+ my $___DEV_E = undef; # required, basename of files with references
80
+ my $___DECODER = undef; # required, pathname to the decoder executable
81
+ my $___CONFIG = undef; # required, pathname to startup ini file
82
+ my $___N_BEST_LIST_SIZE = 100;
83
+ my $___MAX_MERT_ITER = 0; # do not limit the number of iterations
84
+ my $queue_flags = "-l mem_free=0.5G -hard"; # extra parameters for parallelizer
85
+ # the -l ws0ssmt is relevant only to JHU workshop
86
+ my $___JOBS = undef; # if parallel, number of jobs to use (undef -> serial)
87
+ my $___DECODER_FLAGS = ""; # additional parametrs to pass to the decoder
88
+ my $___LAMBDA = undef; # string specifying the seed weights and boundaries of all lambdas
89
+ my $skip_decoder = 0; # and should we skip the first decoder run (assuming we got interrupted during mert)
90
+ my $___FILTER_PHRASE_TABLE = 1; # filter phrase table
91
+ my $___PREDICTABLE_SEEDS = 0;
92
+ my $___METRIC = "BLEU 4 shortest"; # name of metric that will be used for minimum error training, followed by metric parameters (see zmert documentation)
93
+ my $___SEMPOSBLEU_WEIGHTS = "1 1"; # weights of SemPOS and BLEU
94
+ my $___LAMBDAS_OUT = undef; # file where final lambdas should be written
95
+ my $___EXTRACT_SEMPOS = "none"; # how shall we get the SemPOS factor (only for SemPOS metric)
96
+ # options: 1) 'none' - moses generates SemPOS factor in required format
97
+ # (<word_form>|<SemPOS>)
98
+ # 2) 'factors:<factor_index_list>' - extract factors from decoder output on positions from <factor_index_list>
99
+ # <factor_index_list> contains indices of factors separated by comma, e.g. '0,1,4'
100
+ # 3) 'tmt' - moses outputs only <word_form> and we need to
101
+ # generate factors like SemPOS with TectoMT (see http://ufal.mff.cuni.cz/tectomt/)
102
+
103
+ # set 1 if using with async decoder
104
+ my $___ASYNC = 0;
105
+
106
+ # Use "--norm" to select normalization in mert
107
+ my $___NORM = "none";
108
+
109
+ # set 0 if input type is text, set 1 if input type is confusion network
110
+ my $___INPUTTYPE = 0;
111
+
112
+ my $mertdir = "$SCRIPTS_ROOTDIR/../zmert/"; # path to zmert directory
113
+ my $filtercmd = undef; # path to filter-model-given-input.pl
114
+ my $clonecmd = "$SCRIPTS_ROOTDIR/training/clone_moses_model.pl"; # executable clone_moses_model.pl
115
+ my $qsubwrapper = undef;
116
+ my $moses_parallel_cmd = undef;
117
+ my $old_sge = 0; # assume sge<6.0
118
+ my $___ACTIVATE_FEATURES = undef; # comma-separated (or blank-separated) list of features to work on
119
+ # if undef work on all features
120
+ # (others are fixed to the starting values)
121
+ my %active_features; # hash with features to optimize; optimize all if empty
122
+
123
+ use strict;
124
+ use Getopt::Long;
125
+ GetOptions(
126
+ "working-dir=s" => \$___WORKING_DIR,
127
+ "input=s" => \$___DEV_F,
128
+ "inputtype=i" => \$___INPUTTYPE,
129
+ "refs=s" => \$___DEV_E,
130
+ "decoder=s" => \$___DECODER,
131
+ "config=s" => \$___CONFIG,
132
+ "nbest:i" => \$___N_BEST_LIST_SIZE,
133
+ "maxiter:i" => \$___MAX_MERT_ITER,
134
+ "queue-flags:s" => \$queue_flags,
135
+ "jobs=i" => \$___JOBS,
136
+ "decoder-flags=s" => \$___DECODER_FLAGS,
137
+ "lambdas=s" => \$___LAMBDA,
138
+ "metric=s" => \$___METRIC,
139
+ "semposbleu-weights:s" => \$___SEMPOSBLEU_WEIGHTS,
140
+ "extract-sempos=s" => \$___EXTRACT_SEMPOS,
141
+ "norm:s" => \$___NORM,
142
+ "help" => \$usage,
143
+ "verbose" => \$verbose,
144
+ "mert-verbose:i" => \$___MERT_VERBOSE,
145
+ "decoder-verbose:i" => \$___DECODER_VERBOSE,
146
+ "mertdir:s" => \$mertdir, # allow to override the default location of zmert.jar
147
+ "lambdas-out:s" => \$___LAMBDAS_OUT,
148
+ "rootdir=s" => \$SCRIPTS_ROOTDIR,
149
+ "filtercmd=s" => \$filtercmd, # allow to override the default location
150
+ "qsubwrapper=s" => \$qsubwrapper, # allow to override the default location
151
+ "mosesparallelcmd=s" => \$moses_parallel_cmd, # allow to override the default location
152
+ "old-sge" => \$old_sge, #passed to moses-parallel
153
+ "filter-phrase-table!" => \$___FILTER_PHRASE_TABLE, # allow (disallow)filtering of phrase tables
154
+ "predictable-seeds:s" => \$___PREDICTABLE_SEEDS, # allow (disallow) switch on/off reseeding of random restarts
155
+ "async=i" => \$___ASYNC, #whether script to be used with async decoder
156
+ "activate-features=s" => \$___ACTIVATE_FEATURES #comma-separated (or blank-separated) list of features to work on (others are fixed to the starting values)
157
+ ) or exit(1);
158
+
159
+ print "Predict $___PREDICTABLE_SEEDS\n";
160
+
161
+ # the 4 required parameters can be supplied on the command line directly
162
+ # or using the --options
163
+ if (scalar @ARGV == 4) {
164
+ # required parameters: input_file references_basename decoder_executable
165
+ $___DEV_F = shift;
166
+ $___DEV_E = shift;
167
+ $___DECODER = shift;
168
+ $___CONFIG = shift;
169
+ }
170
+
171
+ if ($___ASYNC) {
172
+ delete $default_triples->{"w"};
173
+ $additional_triples->{"w"} = [ [ 0.0, -1.0, 1.0 ] ];
174
+ }
175
+
176
+ print STDERR "After default: $queue_flags\n";
177
+
178
+ if ($usage || !defined $___DEV_F || !defined$___DEV_E || !defined$___DECODER || !defined $___CONFIG) {
179
+ print STDERR "usage: zmert-moses.pl input-text references decoder-executable decoder.ini
180
+ Options:
181
+ --working-dir=mert-dir ... where all the files are created
182
+ --nbest=100 ... how big nbestlist to generate
183
+ --maxiter=N ... maximum number of zmert iterations
184
+ --jobs=N ... set this to anything to run moses in parallel
185
+ --mosesparallelcmd=STRING ... use a different script instead of moses-parallel
186
+ --queue-flags=STRING ... anything you with to pass to
187
+ qsub, eg. '-l ws06osssmt=true'
188
+ The default is
189
+ -l mem_free=0.5G -hard
190
+ To reset the parameters, please use \"--queue-flags=' '\" (i.e. a space between
191
+ the quotes).
192
+ --decoder-flags=STRING ... extra parameters for the decoder
193
+ --lambdas=STRING ... default values and ranges for lambdas, a complex string
194
+ such as 'd:1,0.5-1.5 lm:1,0.5-1.5 tm:0.3,0.25-0.75;0.2,0.25-0.75;0.2,0.25-0.75;0.3,0.25-0.75;0,-0.5-0.5 w:0,-0.5-0.5'
195
+ --allow-unknown-lambdas ... keep going even if someone supplies a new lambda
196
+ in the lambdas option (such as 'superbmodel:1,0-1'); optimize it, too
197
+ --lambdas-out=STRING ... file where final lambdas should be written
198
+ --metric=STRING ... metric name for optimization with metric parameters
199
+ such as 'BLEU 4 closest' or 'SemPOS 0 1'. Use default parameters by specifying 'BLEU' or 'SemPOS'
200
+ --semposbleu-weights=STRING ... weights for SemPOS and BLEU in format 'N:M' where 'N' is SemPOS weight and 'M' BLEU weight
201
+ used only with SemPOS_BLEU metric
202
+ --extract-sempos=STRING ... none|factors:<factor_list>|tmt
203
+ 'none' ... decoder generates all required factors for optimization metric
204
+ 'factors:<factor_list>' ... extract factors with index in <factor_list> from decoder output
205
+ e.g. 'factors:0,2,3' to extract first, third and fourth factor from decoder output
206
+ 'tmt' ... use TectoMT (see http://ufal.mff.cuni.cz/tectomt) to generate required factors
207
+ --norm ... Select normalization for zmert
208
+ --mert-verbose=N ... verbosity of zmert [0|1|2]
209
+ --decoder-verbose=N ... decoder verbosity [0|1] - 1=decoder output included
210
+ --mertdir=STRING ... directory with zmert.jar
211
+ --filtercmd=STRING ... path to filter-model-given-input.pl
212
+ --rootdir=STRING ... where do helpers reside (if not given explicitly)
213
+ --mertdir=STRING ... path to zmert implementation
214
+ --scorenbestcmd=STRING ... path to score-nbest.py
215
+ --old-sge ... passed to moses-parallel, assume Sun Grid Engine < 6.0
216
+ --inputtype=[0|1|2] ... Handle different input types (0 for text, 1 for confusion network, 2 for lattices, default is 0)
217
+ --no-filter-phrase-table ... disallow filtering of phrase tables
218
+ (useful if binary phrase tables are available)
219
+ --predictable-seeds ... provide predictable seeds to mert so that random restarts are the same on every run
220
+ --activate-features=STRING ... comma-separated list of features to work on
221
+ (if undef work on all features)
222
+ # (others are fixed to the starting values)
223
+ --verbose ... verbosity of this script
224
+ --help ... print this help
225
+
226
+ ";
227
+ exit 1;
228
+ }
229
+
230
+ # ensure we know where is tectomt, if we need it
231
+ if( !defined $ENV{"TMT_ROOT"} && $___EXTRACT_SEMPOS =~ /tmt/) {
232
+ die "Cannot find TMT_ROOT. Is TectoMT really initialized?";
233
+ }
234
+ my $TMT_ROOT = $ENV{"TMT_ROOT"};
235
+
236
+ my $srunblocks = "$TMT_ROOT/tools/srunblocks_streaming/srunblocks";
237
+ my $scenario_file = "scenario";
238
+ my $qruncmd = "/home/bojar/diplomka/bin/qruncmd";
239
+ my $srunblocks_cmd = "$srunblocks --errorlevel=FATAL $scenario_file czech_source_sentence factored_output";
240
+ if (defined $___JOBS && $___JOBS > 1) {
241
+ die "Can't run $qruncmd" if ! -x $qruncmd;
242
+ $srunblocks_cmd = "$qruncmd --jobs=$___JOBS --join '$srunblocks_cmd'";
243
+ }
244
+
245
+
246
+ # update variables if input is confusion network
247
+ if ($___INPUTTYPE == 1)
248
+ {
249
+ $ABBR_FULL_MAP = "$ABBR_FULL_MAP I=weight-i";
250
+ %ABBR2FULL = map {split/=/,$_,2} split /\s+/, $ABBR_FULL_MAP;
251
+ %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} split /\s+/, $ABBR_FULL_MAP;
252
+
253
+ push @{$default_triples -> {"I"}}, [ 1.0, 0.0, 2.0 ];
254
+ #$extra_lambdas_for_model -> {"I"} = 1; #Confusion network posterior
255
+ }
256
+
257
+ # update variables if input is lattice
258
+ if ($___INPUTTYPE == 2)
259
+ {
260
+ # TODO
261
+ }
262
+
263
+ if (defined $___ACTIVATE_FEATURES)
264
+ {
265
+ %active_features = map {$_ => 1} split( /,/, $___ACTIVATE_FEATURES);
266
+ }
267
+
268
+ # Check validity of input parameters and set defaults if needed
269
+
270
+ print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n";
271
+
272
+ # path of script for filtering phrase tables and running the decoder
273
+ $filtercmd="$SCRIPTS_ROOTDIR/training/filter-model-given-input.pl" if !defined $filtercmd;
274
+
275
+ $qsubwrapper="$SCRIPTS_ROOTDIR/generic/qsub-wrapper.pl" if !defined $qsubwrapper;
276
+
277
+ $moses_parallel_cmd = "$SCRIPTS_ROOTDIR/generic/moses-parallel.pl"
278
+ if !defined $moses_parallel_cmd;
279
+
280
+
281
+
282
+ die "Error: need to specify the zmert.jar directory" if !defined $mertdir;
283
+
284
+ my $zmert_classpath = ensure_full_path("$mertdir/zmert.jar");
285
+ die "File not found: $mertdir/zmert.jar (interpreted as $zmert_classpath)"
286
+ if ! -e $zmert_classpath;
287
+
288
+ my ($just_cmd_filtercmd,$x) = split(/ /,$filtercmd);
289
+ die "Not executable: $just_cmd_filtercmd" if ! -x $just_cmd_filtercmd;
290
+ die "Not executable: $moses_parallel_cmd" if defined $___JOBS && ! -x $moses_parallel_cmd;
291
+ die "Not executable: $qsubwrapper" if defined $___JOBS && ! -x $qsubwrapper;
292
+ die "Not executable: $___DECODER" if ! -x $___DECODER;
293
+
294
+ my $input_abs = ensure_full_path($___DEV_F);
295
+ die "File not found: $___DEV_F (interpreted as $input_abs)."
296
+ if ! -e $input_abs;
297
+ $___DEV_F = $input_abs;
298
+
299
+
300
+ # Option to pass to qsubwrapper and moses-parallel
301
+ my $pass_old_sge = $old_sge ? "-old-sge" : "";
302
+
303
+ my $decoder_abs = ensure_full_path($___DECODER);
304
+ die "File not found: $___DECODER (interpreted as $decoder_abs)."
305
+ if ! -x $decoder_abs;
306
+ $___DECODER = $decoder_abs;
307
+
308
+
309
+ my $ref_abs = ensure_full_path($___DEV_E);
310
+ # check if English dev set (reference translations) exist and store a list of all references
311
+ my @references;
312
+ my @references_factored;
313
+ if (-e $ref_abs) {
314
+ push @references, $ref_abs;
315
+ }
316
+ else {
317
+ # if multiple file, get a full list of the files
318
+ my $part = 0;
319
+ while (-e $ref_abs.$part) {
320
+ push @references, $ref_abs.$part;
321
+ $part++;
322
+ }
323
+ die("Reference translations not found: $___DEV_E (interpreted as $ref_abs)") unless $part;
324
+ }
325
+
326
+ my $config_abs = ensure_full_path($___CONFIG);
327
+ die "File not found: $___CONFIG (interpreted as $config_abs)."
328
+ if ! -e $config_abs;
329
+ $___CONFIG = $config_abs;
330
+
331
+
332
+
333
+ # check validity of moses.ini and collect number of models and lambdas per model
334
+ # need to make a copy of $extra_lambdas_for_model, scan_config spoils it
335
+ #my %copy_of_extra_lambdas_for_model = %$extra_lambdas_for_model;
336
+ my %used_triples = %{$default_triples};
337
+ my ($models_used) = scan_config($___CONFIG);
338
+
339
+ # Parse the lambda config string and convert it to a nice structure in the same format as $used_triples
340
+ if (defined $___LAMBDA) {
341
+ my %specified_triples;
342
+ # interpreting lambdas from command line
343
+ foreach (split(/\s+/,$___LAMBDA)) {
344
+ my ($name,$values) = split(/:/);
345
+ die "Malformed setting: '$_', expected name:values\n" if !defined $name || !defined $values;
346
+ foreach my $startminmax (split/;/,$values) {
347
+ if ($startminmax =~ /^(-?[\.\d]+),(-?[\.\d]+)-(-?[\.\d]+)$/) {
348
+ my $start = $1;
349
+ my $min = $2;
350
+ my $max = $3;
351
+ push @{$specified_triples{$name}}, [$start, $min, $max];
352
+ }
353
+ else {
354
+ die "Malformed feature range definition: $name => $startminmax\n";
355
+ }
356
+ }
357
+ }
358
+ # sanity checks for specified lambda triples
359
+ foreach my $name (keys %used_triples) {
360
+ die "No lambdas specified for '$name', but ".($#{$used_triples{$name}}+1)." needed.\n"
361
+ unless defined($specified_triples{$name});
362
+ die "Number of lambdas specified for '$name' (".($#{$specified_triples{$name}}+1).") does not match number needed (".($#{$used_triples{$name}}+1).")\n"
363
+ if (($#{$used_triples{$name}}) != ($#{$specified_triples{$name}}));
364
+ }
365
+ foreach my $name (keys %specified_triples) {
366
+ die "Lambdas specified for '$name' ".(@{$specified_triples{$name}}).", but none needed.\n"
367
+ unless defined($used_triples{$name});
368
+ }
369
+ %used_triples = %specified_triples;
370
+ }
371
+
372
+ # moses should use our config
373
+ if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
374
+ || $___DECODER_FLAGS =~ /(^|\s)-(ttable-file|t) /
375
+ || $___DECODER_FLAGS =~ /(^|\s)-(distortion-file) /
376
+ || $___DECODER_FLAGS =~ /(^|\s)-(generation-file) /
377
+ || $___DECODER_FLAGS =~ /(^|\s)-(lmodel-file) /
378
+ || $___DECODER_FLAGS =~ /(^|\s)-(global-lexical-file) /
379
+ ) {
380
+ die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";
381
+ }
382
+
383
+ #store current directory and create the working directory (if needed)
384
+ my $cwd = `pawd 2>/dev/null`;
385
+ if(!$cwd){$cwd = `pwd`;}
386
+ chomp($cwd);
387
+
388
+ safesystem("mkdir -p $___WORKING_DIR") or die "Can't mkdir $___WORKING_DIR";
389
+
390
+ {
391
+ # open local scope
392
+
393
+ #chdir to the working directory
394
+ chdir($___WORKING_DIR) or die "Can't chdir to $___WORKING_DIR";
395
+
396
+ # fixed file names
397
+ my $mert_logfile = "zmert.log";
398
+
399
+ if ($___FILTER_PHRASE_TABLE){
400
+ # filter the phrase tables wih respect to input, use --decoder-flags
401
+ print "filtering the phrase tables... ".`date`;
402
+ my $cmd = "$filtercmd ./filtered $___CONFIG $___DEV_F";
403
+ if (defined $___JOBS) {
404
+ safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=filterphrases.out -stderr=filterphrases.err" )
405
+ or die "Failed to submit filtering of tables to the queue (via $qsubwrapper)";
406
+ } else {
407
+ safesystem($cmd) or die "Failed to filter the tables.";
408
+ }
409
+
410
+ # the decoder should now use the filtered model
411
+ $___CONFIG = "filtered/moses.ini";
412
+ }
413
+ else{
414
+ # make a local clone of moses.ini
415
+ safesystem("$clonecmd $___CONFIG");
416
+ $___CONFIG = "moses.ini";
417
+ }
418
+
419
+ $___CONFIG = ensure_full_path($___CONFIG);
420
+
421
+ my $PARAMETERS;
422
+ $PARAMETERS = $___DECODER_FLAGS;
423
+
424
+ my $nbest_file = "zmert.best$___N_BEST_LIST_SIZE.out";
425
+
426
+ # Run zmert to optimize lambdas
427
+ # We need to prepare:
428
+ # 1) decoder launch script (decoder_cmd) - must be executable
429
+ # 2) zmert configuration file (zmert_cfg.txt)
430
+ # 3) parameters we want to optimize (params.txt)
431
+ # 4) decoder configuration file (decoder_cfg_inter.txt)
432
+
433
+
434
+ my $zmert_cfg = ensure_full_path("zmert_cfg.txt");
435
+ my $opt_params = "params.txt"; # zmert requires path relative to launch path
436
+ my $decoder_cfg_inter = "decoder_cfg_inter.txt"; # zmert requires path relative to launch path
437
+ my $decoder_cmd_file = ensure_full_path("decoder_cmd");
438
+ my $iteration_file = "iteration";
439
+
440
+ my $LAMBDAS_FILE = ensure_full_path("finalWeights.txt");
441
+
442
+ # prepare script that will launch moses from template
443
+ # it will include an update script that will adjust feature weights according to
444
+ # the last zmert iteration (they are stored in file $decoder_cfg_inter)
445
+
446
+ # prepare lauch command with all parameters
447
+ my $decoder_cmd;
448
+ if (defined $___JOBS) {
449
+ $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix zmert -queue-parameters '$queue_flags' -decoder-parameters '$PARAMETERS' -n-best-list '$nbest_file $___N_BEST_LIST_SIZE' -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > moses.out";
450
+ } else {
451
+ $decoder_cmd = "$___DECODER $PARAMETERS -config $___CONFIG -inputtype $___INPUTTYPE -n-best-list $nbest_file $___N_BEST_LIST_SIZE -i $___DEV_F > moses.out";
452
+ }
453
+
454
+ my $zmert_decoder_cmd = "$SCRIPTS_ROOTDIR/training/zmert-decoder.pl";
455
+
456
+ # number of factors that a given metric requires
457
+ my $metric_num_factors = 1;
458
+
459
+ # SemPOS metric requires 2 parameters specifying position of t_lemma and sempos factor
460
+ # e.g. for t_lemma|sempos|factor3|factor4|... the values are 0 and 1 (default setting)
461
+ if( $___METRIC =~ /^SemPOS$/) {
462
+ $___METRIC .= " 0 1";
463
+ $metric_num_factors = 2;
464
+ }
465
+ # SemPOS_BLEU metric requires 7 parameters
466
+ # 1) weight of SemPOS 2) weight of BLEU
467
+ # 3) index of t_lemma for SemPOS 4) index of sempos for SemPOS
468
+ # 5) max ngram for BLEU 6) ref length strategy for BLEU
469
+ # 7) index of factor to compute BLEU on
470
+ elsif( $___METRIC =~ /^SemPOS_BLEU$/) {
471
+ $___SEMPOSBLEU_WEIGHTS =~ /^.*:.*$/ or die "--semposbleu-weights is not in format <sempos_weight>:<bleu_weight>";
472
+ $___SEMPOSBLEU_WEIGHTS =~ s/:/ /;
473
+ $___METRIC .= " $___SEMPOSBLEU_WEIGHTS 1 2 4 closest 0";
474
+ $metric_num_factors = 3;
475
+ }
476
+ elsif( $___METRIC =~ /^BLEU$/) {
477
+ $___METRIC .= " 4 closest";
478
+ }
479
+ elsif( $___METRIC =~ /^TER$/) {
480
+ $___METRIC .= " nocase punc 20 50";
481
+ }
482
+ elsif( $___METRIC =~ /^TER-BLEU$/) {
483
+ $___METRIC .= " nocase punc 20 50 4 closest";
484
+ }
485
+
486
+ if( $___EXTRACT_SEMPOS =~ /tmt/) {
487
+ my $print_string = "";
488
+ if( $___METRIC =~ /SemPOS_BLEU/) {
489
+ $print_string = "Print::ForSemPOSBLEUMetric TMT_PARAM_PRINT_FOR_SEMPOS_BLEU_METRIC=m:form|t_lemma|gram/sempos TMT_PARAM_PRINT_FOR_SEMPOS_BLEU_METRIC_DESTINATION=factored_output";
490
+ } elsif( $___METRIC =~ /SemPOS/) {
491
+ $print_string = "Print::ForSemPOSMetric TMT_PARAM_PRINT_FOR_SEMPOS_METRIC=t_lemma|gram/sempos TMT_PARAM_PRINT_FOR_SEMPOS_METRIC_DESTINATION=factored_output";
492
+ } else {
493
+ die "Trying to get factors using tmt for unknown metric $___METRIC";
494
+ }
495
+
496
+ open( SCENARIO, ">$scenario_file") or die "Cannot open $scenario_file";
497
+ print SCENARIO << "FILE_EOF";
498
+ SCzechW_to_SCzechM::Tokenize_joining_numbers
499
+ SCzechW_to_SCzechM::TagMorce
500
+ # SCzechM_to_SCzechN::Czech_named_ent_SVM_recognizer
501
+ # SCzechM_to_SCzechN::Geo_ne_recognizer
502
+ # SCzechM_to_SCzechN::Embed_instances
503
+ SCzechM_to_SCzechA::McD_parser_local TMT_PARAM_MCD_CZ_MODEL=pdt20_train_autTag_golden_latin2_pruned_0.02.model
504
+ # SCzechM_to_SCzechA::McD_parser_local TMT_PARAM_MCD_CZ_MODEL=pdt20_train_autTag_golden_latin2_pruned_0.10.model
505
+ SCzechM_to_SCzechA::Fix_atree_after_McD
506
+ SCzechM_to_SCzechA::Fix_is_member
507
+ SCzechA_to_SCzechT::Mark_auxiliary_nodes
508
+ SCzechA_to_SCzechT::Build_ttree
509
+ SCzechA_to_SCzechT::Fill_is_member
510
+ SCzechA_to_SCzechT::Rehang_unary_coord_conj
511
+ SCzechA_to_SCzechT::Assign_coap_functors
512
+ SCzechA_to_SCzechT::Fix_is_member
513
+ SCzechA_to_SCzechT::Distrib_coord_aux
514
+ SCzechA_to_SCzechT::Mark_clause_heads
515
+ SCzechA_to_SCzechT::Mark_relclause_heads
516
+ SCzechA_to_SCzechT::Mark_relclause_coref
517
+ SCzechA_to_SCzechT::Fix_tlemmas
518
+ SCzechA_to_SCzechT::Assign_nodetype
519
+ SCzechA_to_SCzechT::Assign_grammatemes
520
+ SCzechA_to_SCzechT::Detect_formeme
521
+ SCzechA_to_SCzechT::Add_PersPron
522
+ SCzechA_to_SCzechT::Mark_reflpron_coref
523
+ SCzechA_to_SCzechT::TBLa2t_phaseFd
524
+ $print_string
525
+ FILE_EOF
526
+ close( SCENARIO);
527
+ }
528
+
529
+ my $feats_order = join( " ", keys %used_triples);
530
+
531
+ open( DECODER_CMD, ">$decoder_cmd_file") or die "Cannot open $decoder_cmd_file";
532
+ print DECODER_CMD <<"FILE_EOF";
533
+ #!/usr/bin/perl -w
534
+
535
+ use strict;
536
+
537
+ my %FULL2ABBR = map {my (\$a, \$b) = split/=/,\$_,2; (\$b, \$a);} split /\\s+/, "$ABBR_FULL_MAP";
538
+
539
+ open( ITERATION, "<$iteration_file") or die "Cannot open $iteration_file";
540
+ my \$iteration = <ITERATION>;
541
+ close( ITERATION);
542
+ chomp( \$iteration);
543
+
544
+ my \@features_order = qw( $feats_order );
545
+
546
+ # extract feature weights from last zmert iteration (stored in \$decoder_cfg_inter)
547
+ print "Updating decoder config file from file $decoder_cfg_inter\n";
548
+
549
+ my \$moses_ini = "$___CONFIG";
550
+
551
+ open( IN, "$decoder_cfg_inter") or die "Cannot open file $decoder_cfg_inter (reading updated lambdas)";
552
+ FILE_EOF
553
+
554
+ print DECODER_CMD <<'FILE_EOF';
555
+ my %lambdas = ();
556
+ my $lastName = "";
557
+ while( my $line = <IN>) {
558
+ chomp($line);
559
+ my ($name, $val) = split( /\s+/, $line);
560
+ $name =~ s/_\d+$//; # remove index of the lambda
561
+ push( @{$lambdas{$name}}, $val);
562
+ }
563
+ close(IN);
564
+
565
+
566
+ my $moses_ini_old = "$moses_ini";
567
+ $moses_ini_old =~ s/^(.*)\/([^\/]+)$/$1\/run$iteration.$2/;
568
+ $moses_ini_old = $moses_ini.".orig" if( $iteration == 0);
569
+ safesystem("mv $moses_ini $moses_ini_old");
570
+ # update moses.ini
571
+ open( INI_OLD, "<$moses_ini_old") or die "Cannot open config file $moses_ini_old";
572
+ open( INI, ">$moses_ini") or die "Cannot open config file $moses_ini";
573
+ while( my $line = <INI_OLD>) {
574
+ if( $line =~ m/^\[(weight-.+)\]$/) {
575
+ my $name = $FULL2ABBR{$1};
576
+ print STDERR "Updating weight: $1, $name\n";
577
+ print INI "$line";
578
+ foreach( @{$lambdas{$name}}) {
579
+ print INI "$_\n";
580
+ print STDERR "NEW: $_\tOLD:";
581
+ $line = <INI_OLD>;
582
+ print STDERR $line;
583
+ }
584
+ } else {
585
+ print INI $line;
586
+ }
587
+ }
588
+ close(INI_OLD);
589
+ close(INI);
590
+
591
+ FILE_EOF
592
+
593
+ print DECODER_CMD <<"FILE_EOF";
594
+ print "Executing: $decoder_cmd";
595
+ safesystem("$decoder_cmd") or die "Failed to execute $decoder_cmd";
596
+
597
+ # update iteration number in intermediate config file
598
+ ++\$iteration;
599
+ safesystem("echo \$iteration > $iteration_file");
600
+
601
+ # modify the nbest-list to conform the zmert required format
602
+ # <i> ||| <candidate_translation> ||| featVal_1 featVal_2 ... featVal_m
603
+ my \$nbest_file_orig = "$nbest_file".".orig";
604
+ safesystem( "mv $nbest_file \$nbest_file_orig");
605
+ open( NBEST_ORIG, "<\$nbest_file_orig") or die "Cannot open original nbest-list \$nbest_file_orig";
606
+ open( NBEST, ">$nbest_file") or die "Cannot open modified nbest-list $nbest_file";
607
+
608
+ my \$line_num = 0;
609
+
610
+ FILE_EOF
611
+
612
+
613
+ if( "$___EXTRACT_SEMPOS" =~ /factors/) {
614
+ print DECODER_CMD <<"FILE_EOF";
615
+ my (undef, \$args) = split( /:/, "$___EXTRACT_SEMPOS");
616
+ my \$factor_count = $metric_num_factors;
617
+ FILE_EOF
618
+ print DECODER_CMD <<'FILE_EOF';
619
+ my @indices = split( /,/, $args);
620
+ die "Specified ".scalar @indices." factors to extract but selected metric requires $factor_count factors"
621
+ if( @indices != $factor_count);
622
+ while( my $line = <NBEST_ORIG>) {
623
+ my @array = split( /\|\|\|/, $line);
624
+ # remove feature names from the feature scores string
625
+ $array[2] = extractScores( $array[2]);
626
+ my @tokens = split( /\s/, $array[1]); # split sentence into words
627
+ $array[1] = "";
628
+ foreach my $token (@tokens) {
629
+ next if $token eq "";
630
+ my @factors = split( /\|/, $token);
631
+ my $put_separator = 0;
632
+ foreach my $index (@indices) {
633
+ die "Cannot extract factor with index $index from '$token'" if ($index > $#factors);
634
+ $array[1] .= '|' if ($put_separator); # separator between factors
635
+ $array[1] .= $factors[$index];
636
+ $put_separator = 1;
637
+ }
638
+ $array[1] .= " "; # space between words
639
+ }
640
+ print NBEST join( '|||', @array);
641
+ }
642
+
643
+ FILE_EOF
644
+
645
+ } elsif( "$___EXTRACT_SEMPOS" =~ /tmt/) {
646
+ print DECODER_CMD <<"FILE_EOF";
647
+ # run TectoMT to analyze sentences
648
+ print STDERR "Analyzing candidates using $srunblocks_cmd\n";
649
+ my \$nbest_factored = "$nbest_file.factored";
650
+ open( NBEST_FACTORED, "|$srunblocks_cmd > \$nbest_factored") or die "Cannot open pipe to command $srunblocks_cmd";
651
+ FILE_EOF
652
+ print DECODER_CMD <<'FILE_EOF';
653
+ my $line_count = 0;
654
+ my @out = ();
655
+ while( my $line = <NBEST_ORIG>) {
656
+ my @array = split( /\|\|\|/, $line);
657
+ die "Nbest-list does not have required format (values separated by '|||')" if ($#array != 3);
658
+ # remove feature names from the feature scores string
659
+ $array[2] = extractScores( $array[2]);
660
+ push( @out, \@array); # store line with scores for output
661
+ # select only word forms
662
+ my $sentence = "";
663
+ foreach my $fact ( split /\s+/, $array[1]) {
664
+ next if( $fact eq "");
665
+ my @fact_array = split( /\|/, $fact);
666
+ $sentence .= "$fact_array[0] ";
667
+ }
668
+ # analyze sentence via TectoMT using scenario
669
+ print NBEST_FACTORED "$sentence\n";
670
+ ++$line_count;
671
+ }
672
+ close( NBEST_ORIG);
673
+ close( NBEST_FACTORED);
674
+
675
+ open( NBEST_FACTORED, "<$nbest_factored") or die "Cannot open $nbest_factored";
676
+ my $line_count_check = 0;
677
+ while( my $line = <NBEST_FACTORED>) {
678
+ chomp( $line);
679
+ my $array_ref = shift( @out);
680
+ $array_ref->[1] = $line;
681
+ print NBEST join( '|||', @{$array_ref});
682
+ ++$line_count_check;
683
+ }
684
+ die "Error: Sent $line_count sentences to analyze but got only $line_count_check back"
685
+ if( $line_count != $line_count_check);
686
+
687
+ FILE_EOF
688
+
689
+ } elsif ($___EXTRACT_SEMPOS eq "none") {
690
+ print DECODER_CMD <<'FILE_EOF';
691
+ while( my $line = <NBEST_ORIG>) {
692
+ my @array = split( /\|\|\|/, $line);
693
+ # remove feature names from the feature scores string
694
+ $array[2] = extractScores( $array[2]);
695
+ print NBEST join( '|||', @array);
696
+ }
697
+ FILE_EOF
698
+ } else {
699
+ die "Unknown type of factor extraction: $___EXTRACT_SEMPOS";
700
+ }
701
+
702
+ print DECODER_CMD <<'FILE_EOF';
703
+ close( NBEST);
704
+ close( NBEST_ORIG);
705
+
706
+ # END OF BODY
707
+
708
+ sub extractScores {
709
+ my $scores = shift;
710
+ my (%scores_hash, $name);
711
+ foreach my $score_or_name (split /\s+/, $scores) {
712
+ if( $score_or_name =~ s/://) {
713
+ $name = $score_or_name;
714
+ } elsif ($score_or_name =~ /\d/) {
715
+ die "Cannot guess nbest-list first feature score name" if( not defined $name);
716
+ $scores_hash{$name} .= "$score_or_name ";
717
+ } else {
718
+ die "Unknown string ($score_or_name) in nbest-list feature scores section (not a feature name or score)"
719
+ if( $score_or_name =~ /\S/);
720
+ }
721
+ }
722
+ $scores = "";
723
+ foreach $name (@features_order) {
724
+ $scores .= $scores_hash{$name};
725
+ }
726
+ #print STDERR "REORDERED SCORES: $scores\n";
727
+ return $scores;
728
+ }
729
+
730
+ sub safesystem {
731
+ print STDERR "Executing: @_\n";
732
+ system(@_);
733
+ if ($? == -1) {
734
+ print STDERR "Failed to execute: @_\n $!\n";
735
+ exit(1);
736
+ }
737
+ elsif ($? & 127) {
738
+ printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
739
+ ($? & 127), ($? & 128) ? 'with' : 'without';
740
+ exit(1);
741
+ }
742
+ else {
743
+ my $exitcode = $? >> 8;
744
+ print STDERR "Exit code: $exitcode\n" if $exitcode;
745
+ return ! $exitcode;
746
+ }
747
+ }
748
+ FILE_EOF
749
+
750
+ close( DECODER_CMD);
751
+
752
+ # make the decoder lauch script executable
753
+ safesystem("chmod a+x $decoder_cmd_file");
754
+
755
+ # analyze reference if necessary
756
+ if( $___EXTRACT_SEMPOS =~ /tmt/) {
757
+ my $part = 0;
758
+ foreach my $ref (@references) {
759
+ my $line_count = 0;
760
+ print STDERR "Analyzing references using $srunblocks_cmd\n";
761
+ open( REF_IN, "<$ref") or die "Cannot open $ref";
762
+ my $ref_factored = "$ref.factored.$part";
763
+ push( @references_factored, $ref_factored);
764
+ open( REF_FACTORED, "|$srunblocks_cmd > $ref_factored");
765
+ while( my $line = <REF_IN>) {
766
+ # analyze sentence via TectoMT using scenario in file $scerario_file
767
+ print REF_FACTORED $line;
768
+ ++$line_count;
769
+ }
770
+ close( REF_IN);
771
+ close( REF_FACTORED);
772
+ my $line_count_check = 0;
773
+ open( REF_FACTORED, "<$ref_factored") or die "Cannot open $ref_factored";
774
+ ++$line_count_check while( <REF_FACTORED>);
775
+ die "Error: Sent $line_count sentences to analyze but got $line_count_check back"
776
+ if( $line_count != $line_count_check);
777
+ close( REF_FACTORED);
778
+ ++$part;
779
+ }
780
+ print STDERR "References analyzed\n";
781
+ } else {
782
+ push( @references_factored, @references);
783
+ }
784
+
785
+ my $ref_stem = $references_factored[0];
786
+ $ref_stem =~ s/\d+$// if( $#references_factored); # get the file stem if we have more than one refs
787
+ $ref_stem =~ s/.*\/([^\/]+)$/..\/$1/;
788
+
789
+ # prepare zmert configuration file
790
+ open( ZMERT_CFG, ">$zmert_cfg") or die "Cannot open $zmert_cfg";
791
+
792
+ # FILES
793
+ # print ZMERT_CFG "-dir\t$___PATH_FROM_LAUNCHDIR\n"; # working path (relative to the lauch path)
794
+ # print ZMERT_CFG "-r\t$___DEV_E\n"; # file(s) containing references
795
+ print ZMERT_CFG "-r\t$ref_stem\n"; # file(s) containing references
796
+ print ZMERT_CFG "-rps\t".scalar(@references)."\n"; # number of references per sentence
797
+ print ZMERT_CFG "-txtNrm\t0\n"; # we use our own text normalization
798
+ print ZMERT_CFG "-p\t$opt_params\n"; # file containig parameter names, initial values, ranges
799
+ print ZMERT_CFG "-fin\t$___LAMBDAS_OUT\n" if(defined $___LAMBDAS_OUT); # file where the final weight vector is written
800
+
801
+ # MERT CONFIGURATION
802
+ print ZMERT_CFG "-m\t$___METRIC\n";
803
+ print ZMERT_CFG "-maxIt\t$___MAX_MERT_ITER\n" if( $___MAX_MERT_ITER); # maximum number of MERT iterations
804
+ # print ZMERT_CFG "-prevIt\t$PREV_MERT_ITER\n";
805
+ # number of iteration before considering an early exit
806
+ # print ZMERT_CFG "-minIt\t$MIN_MERT_ITER\n";
807
+ # number of consecutive iterations that must satisfy some early stopping
808
+ # criterion to cause an early exit
809
+ # print ZMERT_CFG "-stopIt\t$STOP_MIN_ITER\n";
810
+ # early exit criterion: no weight changes by more than $LAMBDA_CHANGE;
811
+ # default value: -1 (this criterion is never investigated)
812
+ # print ZMERT_CFG "-stopSig\t$LAMBDA_CHANGE\n";
813
+ # save intermediate decoder config files (1) or decoder outputs (2) or both (3) or neither (0)
814
+ print ZMERT_CFG "-save\t$___SAVE_INTER\n";
815
+ # print ZMERT_CFG "-ipi\t$INITS_PER_ITER\n"; # number of intermediate initial points per iteration
816
+ # print ZMERT_CFG "-opi\t$ONCE_PER_ITER\n"; # modify a parameter only once per iteration;
817
+ # print ZMERT_CFG "-rand\t$RAND_INIT\n"; # choose initial points randomly
818
+ print ZMERT_CFG "-seed\t$___PREDICTABLE_SEEDS\n" if($___PREDICTABLE_SEEDS); # initialize the random number generator
819
+
820
+ # DECODER SPECIFICATION
821
+ print ZMERT_CFG "-cmd\t$decoder_cmd_file\n"; # name of file containing commands to run the decoder
822
+ print ZMERT_CFG "-decOut\t$nbest_file\n"; # name of the n-best file produced by the decoder
823
+ # print ZMERT_CFG "-decExit\t$DECODER_EXIT_CODE\n"; # value returned by decoder after successful exit
824
+ print ZMERT_CFG "-dcfg\t$decoder_cfg_inter\n"; # name of intermediate decoder configuration file
825
+ print ZMERT_CFG "-N\t$___N_BEST_LIST_SIZE\n";
826
+
827
+ # OUTPUT SPECIFICATION
828
+ print ZMERT_CFG "-v\t$___MERT_VERBOSE\n"; # zmert verbosity level (0-2)
829
+ print ZMERT_CFG "-decV\t$___DECODER_VERBOSE\n"; # decoder output printed (1) or ignored (0)
830
+
831
+ close( ZMERT_CFG);
832
+
833
+ my ($name, $num, $val, $min, $max);
834
+ # prepare file with parameters to optimize
835
+ open( PARAMS, ">$opt_params") or die "Cannot open file $opt_params with parameters to optimize";
836
+ my $optString;
837
+ foreach $name (keys %used_triples) {
838
+ $num = 0;
839
+ foreach my $triple (@{$used_triples{$name}}) {
840
+ ($val, $min, $max) = @$triple;
841
+ my ($minRand, $maxRand) = ($min, $max);
842
+ # the file should describe features to optimize in the following format:
843
+ # "featureName ||| defValue optString minVal maxVal minRandVal maxRandVal"
844
+ # optString can be 'Opt' or 'Fix'
845
+ $optString = "Opt";
846
+ if( defined $___ACTIVATE_FEATURES and not $active_features{$name."_$num"}) {
847
+ $optString = "Fix";
848
+ }
849
+ print PARAMS "$name"."_$num ||| $val $optString $min $max $minRand $maxRand\n";
850
+ ++$num;
851
+ }
852
+ }
853
+ print PARAMS "normalization = $___NORM\n";
854
+ close( PARAMS);
855
+
856
+ # prepare intermediate config file from which moses.ini will be updated before each launch
857
+ open( DEC_CFG, ">$decoder_cfg_inter") or die "Cannot open file $decoder_cfg_inter";
858
+ foreach $name (keys %used_triples) {
859
+ $num = 0;
860
+ foreach my $tri (@{$used_triples{$name}}) {
861
+ ($val, $min, $max) = @$tri;
862
+ print DEC_CFG $name."_$num $val\n";
863
+ ++$num;
864
+ }
865
+ }
866
+ close( DEC_CFG);
867
+
868
+ open( ITER, ">$iteration_file") or die "Cannot open file $iteration_file";
869
+ print ITER "1";
870
+ close( ITER);
871
+
872
+ # launch zmert
873
+ my $javaMaxMem = ""; # -maxMem 4000" # use at most 4000MB of memory
874
+ my $cmd = "java -cp $zmert_classpath ZMERT $javaMaxMem $zmert_cfg";
875
+
876
+ print "Zmert start at ".`date`;
877
+
878
+ if ( 0 && defined $___JOBS) {
879
+ # NOT WORKING - this branch needs to init environment variables
880
+ safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -stderr=$mert_logfile -queue-parameter='$queue_flags'") or die "Failed to start zmert (via qsubwrapper $qsubwrapper)";
881
+
882
+ } else {
883
+ safesystem("$cmd 2> $mert_logfile") or die "Failed to run zmert";
884
+ }
885
+
886
+ print "Zmert finished at ".`date`;
887
+
888
+ # RELEVANT ONLY FOR PLAYGROUND at UFAL, CHARLES UNIVESITY IN PRAGUE
889
+ # copy optimized moses.ini and original run1.moses.ini to the working directory
890
+ if( $___FILTER_PHRASE_TABLE) {
891
+ my ($config_opt, $config_std, $config_base) = ($___CONFIG, $___CONFIG, "$cwd/moses.abs.ini");
892
+ $config_std =~ s/^(.*)\/([^\/]+)$/$1\/run1.$2/;
893
+ mergeConfigs( $config_base, $___CONFIG);
894
+ mergeConfigs( $config_base, $config_std);
895
+ }
896
+
897
+ # chdir back to the original directory # useless, just to remind we were not there
898
+ chdir($cwd);
899
+
900
+
901
+ } # end of local scope
902
+
903
+ sub mergeConfigs {
904
+ my ($config_base, $config_weights) = @_;
905
+ my $config_new = $config_weights;
906
+ $config_new =~ s/^.*\///;
907
+ open BASE, "<$config_base" or die "Cannot open $config_base";
908
+ open WEIGHTS, "<$config_weights" or die "Cannot open $config_weights";
909
+ open NEW, ">$config_new" or die "Cannot open $config_new";
910
+ my $cont = 1;
911
+ my ($b_line, $w_line);
912
+ while( $cont) {
913
+ $b_line = <BASE>;
914
+ $w_line = <WEIGHTS>;
915
+ $cont = (defined $b_line and defined $w_line);
916
+ if( $b_line =~ /^\[weight-/) {
917
+ if( $w_line !~ /^\[weight-/) { die "mergeConfigs: $config_base and $config_weights do not have the same format"; }
918
+ print NEW $w_line;
919
+ $b_line = <BASE>; $w_line = <WEIGHTS>;
920
+ while( $w_line =~ /\d/) {
921
+ print NEW $w_line;
922
+ $b_line = <BASE>; $w_line = <WEIGHTS>;
923
+ }
924
+ print NEW $b_line;
925
+ } else {
926
+ print NEW $b_line;
927
+ }
928
+ }
929
+ close BASE;
930
+ close WEIGHTS;
931
+ close NEW;
932
+ }
933
+
934
+ sub dump_triples {
935
+ my $triples = shift;
936
+
937
+ foreach my $name (keys %$triples) {
938
+ foreach my $triple (@{$triples->{$name}}) {
939
+ my ($val, $min, $max) = @$triple;
940
+ }
941
+ }
942
+ }
943
+
944
+ sub safesystem {
945
+ print STDERR "Executing: @_\n";
946
+ system(@_);
947
+ if ($? == -1) {
948
+ print STDERR "Failed to execute: @_\n $!\n";
949
+ exit(1);
950
+ }
951
+ elsif ($? & 127) {
952
+ printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
953
+ ($? & 127), ($? & 128) ? 'with' : 'without';
954
+ exit(1);
955
+ }
956
+ else {
957
+ my $exitcode = $? >> 8;
958
+ print STDERR "Exit code: $exitcode\n" if $exitcode;
959
+ return ! $exitcode;
960
+ }
961
+ }
962
+
963
+ sub ensure_full_path {
964
+ my $PATH = shift;
965
+ $PATH =~ s/\/nfsmnt//;
966
+ return $PATH if $PATH =~ /^\//;
967
+ my $dir = `pawd 2>/dev/null`;
968
+ if(!$dir){$dir = `pwd`;}
969
+ chomp($dir);
970
+ $PATH = $dir."/".$PATH;
971
+ $PATH =~ s/[\r\n]//g;
972
+ $PATH =~ s/\/\.\//\//g;
973
+ $PATH =~ s/\/+/\//g;
974
+ my $sanity = 0;
975
+ while($PATH =~ /\/\.\.\// && $sanity++<10) {
976
+ $PATH =~ s/\/+/\//g;
977
+ $PATH =~ s/\/[^\/]+\/\.\.\//\//g;
978
+ }
979
+ $PATH =~ s/\/[^\/]+\/\.\.$//;
980
+ $PATH =~ s/\/+$//;
981
+ $PATH =~ s/\/nfsmnt//;
982
+ return $PATH;
983
+ }
984
+
985
+ sub scan_config {
986
+ my $ini = shift;
987
+ my $inishortname = $ini; $inishortname =~ s/^.*\///; # for error reporting
988
+ # we get a pre-filled counts, because some lambdas are always needed (word penalty, for instance)
989
+ # as we walk though the ini file, we record how many extra lambdas do we need
990
+ # and finally, we report it
991
+
992
+ # in which field (counting from zero) is the filename to check?
993
+ my %where_is_filename = (
994
+ "ttable-file" => 4,
995
+ "generation-file" => 3,
996
+ "lmodel-file" => 3,
997
+ "distortion-file" => 3,
998
+ "global-lexical-file" => 1,
999
+ );
1000
+ # by default, each line of each section means one lambda, but some sections
1001
+ # explicitly state a custom number of lambdas
1002
+ my %where_is_lambda_count = (
1003
+ "ttable-file" => 3,
1004
+ "generation-file" => 2,
1005
+ "distortion-file" => 2,
1006
+ );
1007
+
1008
+ open INI, $ini or die "Can't read $ini";
1009
+ my $section = undef; # name of the section we are reading
1010
+ my $shortname = undef; # the corresponding short name
1011
+ my $nr = 0;
1012
+ my $error = 0;
1013
+ my %defined_files;
1014
+ my %defined_steps; # check the ini file for compatible mapping steps and actually defined files
1015
+ while (<INI>) {
1016
+ $nr++;
1017
+ next if /^\s*#/; # skip comments
1018
+ if (/^\[([^\]]*)\]\s*$/) {
1019
+ $section = $1;
1020
+ $shortname = $TABLECONFIG2ABBR{$section};
1021
+ next;
1022
+ }
1023
+ if (defined $section && $section eq "mapping") {
1024
+ # keep track of mapping steps used
1025
+ $defined_steps{$1}++ if /^([TG])/ || /^\d+ ([TG])/;
1026
+ }
1027
+ if (defined $section && defined $where_is_filename{$section}) {
1028
+ print "$section -> $where_is_filename{$section}\n";
1029
+ # this ini section is relevant to lambdas
1030
+ chomp;
1031
+ my @flds = split / +/;
1032
+ my $fn = $flds[$where_is_filename{$section}];
1033
+ if (defined $fn && $fn !~ /^\s+$/) {
1034
+ print "checking weight-count for $section\n";
1035
+ # this is a filename! check it
1036
+ if ($fn !~ /^\//) {
1037
+ $error = 1;
1038
+ print STDERR "$inishortname:$nr:Filename not absolute: $fn\n";
1039
+ }
1040
+ if (! -s $fn && ! -s "$fn.gz" && ! -s "$fn.binphr.idx" && ! -s "$fn.binlexr.idx" ) {
1041
+ $error = 1;
1042
+ print STDERR "$inishortname:$nr:File does not exist or empty: $fn\n";
1043
+ }
1044
+ # remember the number of files used, to know how many lambdas do we need
1045
+ die "No short name was defined for section $section!"
1046
+ if ! defined $shortname;
1047
+
1048
+ # how many lambdas does this model need?
1049
+ # either specified explicitly, or the default, i.e. one
1050
+ my $needlambdas = defined $where_is_lambda_count{$section} ? $flds[$where_is_lambda_count{$section}] : 1;
1051
+
1052
+ print STDERR "Config needs $needlambdas lambdas for $section (i.e. $shortname)\n" if $verbose;
1053
+ if (!defined $___LAMBDA && (!defined $additional_triples->{$shortname} || scalar(@{$additional_triples->{$shortname}}) < $needlambdas)) {
1054
+ print STDERR "$inishortname:$nr:Your model $shortname needs $needlambdas weights but we define the default ranges for only "
1055
+ .scalar(@{$additional_triples->{$shortname}})." weights. Cannot use the default, you must supply lambdas by hand.\n";
1056
+ $error = 1;
1057
+ }
1058
+ else {
1059
+ # note: table may use less parameters than the maximum number
1060
+ # of triples
1061
+ for(my $lambda=0;$lambda<$needlambdas;$lambda++) {
1062
+ my ($start, $min, $max)
1063
+ = @{${$additional_triples->{$shortname}}[$lambda]};
1064
+ push @{$used_triples{$shortname}}, [$start, $min, $max];
1065
+ }
1066
+ }
1067
+ $defined_files{$shortname}++;
1068
+ }
1069
+ }
1070
+ }
1071
+ die "$inishortname: File was empty!" if !$nr;
1072
+ close INI;
1073
+ for my $pair (qw/T=tm=translation G=g=generation/) {
1074
+ my ($tg, $shortname, $label) = split /=/, $pair;
1075
+ $defined_files{$shortname} = 0 if ! defined $defined_files{$shortname};
1076
+ $defined_steps{$tg} = 0 if ! defined $defined_steps{$tg};
1077
+
1078
+ if ($defined_files{$shortname} != $defined_steps{$tg}) {
1079
+ print STDERR "$inishortname: You defined $defined_files{$shortname} files for $label but use $defined_steps{$tg} in [mapping]!\n";
1080
+ $error = 1;
1081
+ }
1082
+ }
1083
+
1084
+ # distance-based distortion
1085
+ if ($___ASYNC == 1)
1086
+ {
1087
+ print STDERR "ASYNC distortion & word penalty";
1088
+ my @my_array;
1089
+ for(my $i=0 ; $i < $defined_steps{"T"} ; $i++)
1090
+ {
1091
+ push @my_array, [ 1.0, 0.0, 2.0 ];
1092
+ }
1093
+ push @{$used_triples{"d"}}, @my_array;
1094
+
1095
+ @my_array = ();
1096
+ for(my $i=0 ; $i < $defined_steps{"T"} ; $i++)
1097
+ {
1098
+ push @my_array, [ 0.5, -1.0, 1.0 ];
1099
+ }
1100
+ push @{$used_triples{"w"}}, @my_array;
1101
+
1102
+ # debug print
1103
+ print "distortion:";
1104
+ my $refarray=$used_triples{"d"};
1105
+ my @vector=@$refarray;
1106
+ foreach my $subarray (@vector) {
1107
+ my @toto=@$subarray;
1108
+ print @toto,"\n";
1109
+ }
1110
+ #exit 1;
1111
+ }
1112
+ else
1113
+ {
1114
+ print STDERR "SYNC distortion";
1115
+ push @{$used_triples{"d"}}, [1.0, 0.0, 2.0];
1116
+ }
1117
+
1118
+
1119
+ exit(1) if $error;
1120
+ return (\%defined_files);
1121
+ }
mosesdecoder/moses/TranslationModel/UG/TargetPhraseCollectionCache.h ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- c++ -*-
2
+ #pragma once
3
+ #include <time.h>
4
+ #include "moses/TargetPhraseCollection.h"
5
+ #include <boost/atomic.hpp>
6
+ #include "mm/ug_typedefs.h"
7
+ namespace Moses
8
+ {
9
+
10
+ class TPCollWrapper;
11
+
12
+ class TPCollCache
13
+ {
14
+ public:
15
+ // typedef boost::unordered_map<uint64_t, SPTR<TPCollWrapper> > cache_t;
16
+ typedef std::map<uint64_t, SPTR<TPCollWrapper> > cache_t;
17
+ private:
18
+ uint32_t m_capacity; // capacity of cache
19
+ cache_t m_cache; // maps from ids to items
20
+ cache_t::iterator m_qfirst, m_qlast;
21
+ mutable boost::shared_mutex m_lock;
22
+ public:
23
+ TPCollCache(size_t capacity=10000);
24
+
25
+ SPTR<TPCollWrapper>
26
+ get(uint64_t key, size_t revision);
27
+
28
+ };
29
+
30
+ // wrapper around TargetPhraseCollection with reference counting
31
+ // and additional members for caching purposes
32
+ class TPCollWrapper
33
+ : public TargetPhraseCollection
34
+ {
35
+ friend class TPCollCache;
36
+ friend class Mmsapt;
37
+ public:
38
+ TPCollCache::cache_t::iterator prev, next;
39
+ public:
40
+ mutable boost::shared_mutex lock;
41
+ size_t const revision; // rev. No. of the underlying corpus
42
+ uint64_t const key; // phrase key
43
+ TPCollWrapper(uint64_t const key, size_t const rev);
44
+ ~TPCollWrapper();
45
+ };
46
+
47
+ }
mosesdecoder/moses/TranslationModel/UG/bitext-find.cc ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <boost/program_options.hpp>
2
+ #include "mm/ug_bitext.h"
3
+ #include <string>
4
+
5
+ using namespace std;
6
+ using namespace Moses;
7
+ using namespace sapt;
8
+
9
+ namespace po=boost::program_options;
10
+ typedef L2R_Token<SimpleWordId> Token;
11
+ typedef mmBitext<Token> mmbitext;
12
+ typedef Bitext<Token>::tsa tsa;
13
+
14
+ string bname, L1, L2, Q1, Q2;
15
+ size_t maxhits;
16
+ void interpret_args(int ac, char* av[]);
17
+
18
+
19
+ void
20
+ write_sentence
21
+ (Ttrack<Token> const& T, uint32_t const sid, TokenIndex const& V, ostream& out)
22
+ {
23
+ Token const* t = T.sntStart(sid);
24
+ Token const* e = T.sntEnd(sid);
25
+ // size_t i = 0;
26
+ while (t < e)
27
+ {
28
+ // out << i++ << ":";
29
+ out << V[t->id()];
30
+ if (++t < e) out << " ";
31
+ }
32
+ }
33
+
34
+ bool
35
+ fill(string const& query, TSA<Token> const& tsa,
36
+ TokenIndex const& V, bitvector& v)
37
+ {
38
+ v.resize(tsa.getCorpus()->size());
39
+ Bitext<Token>::iter m(&tsa);
40
+ istringstream buf(query); string w;
41
+ while (buf >> w)
42
+ if (!m.extend(V[w]))
43
+ return false;
44
+ m.markSentences(v);
45
+ return true;
46
+ }
47
+
48
+
49
+
50
+
51
+ int main(int argc, char* argv[])
52
+ {
53
+ interpret_args(argc, argv);
54
+ if (Q1.empty() && Q2.empty()) exit(0);
55
+
56
+ boost::shared_ptr<mmbitext> B(new mmbitext); string w;
57
+ B->open(bname, L1, L2);
58
+
59
+ Bitext<Token>::iter m1(B->I1.get(), *B->V1, Q1);
60
+ if (Q1.size() && m1.size() == 0) exit(0);
61
+
62
+ Bitext<Token>::iter m2(B->I2.get(), *B->V2, Q2);
63
+ if (Q2.size() && m2.size() == 0) exit(0);
64
+
65
+ bitvector check(B->T1->size());
66
+ if (Q1.size() == 0 || Q2.size() == 0) check.set();
67
+ else (m2.markSentences(check));
68
+
69
+ Bitext<Token>::iter& m = m1.size() ? m1 : m2;
70
+ char const* x = m.lower_bound(-1);
71
+ char const* stop = m.upper_bound(-1);
72
+ uint64_t sid;
73
+ ushort off;
74
+ boost::taus88 rnd;
75
+ size_t N = m.approxOccurrenceCount();
76
+ maxhits = min(N, maxhits);
77
+ size_t k = 0; // selected
78
+ for (size_t i = 0; x < stop; ++i)
79
+ {
80
+ x = m.root->readSid(x,stop,sid);
81
+ x = m.root->readOffset(x,stop,off);
82
+
83
+ if (!check[sid]) continue;
84
+ size_t r = (N - i) * rnd()/(rnd.max()+1.) + k;
85
+ if (maxhits != N && r >= maxhits) continue;
86
+ ++k;
87
+
88
+ size_t s1,s2,e1,e2; int po_fwd=-1,po_bwd=-1;
89
+ std::vector<unsigned char> caln;
90
+ // cout << sid << " " << B->docname(sid) << std::endl;
91
+ if (!B->find_trg_phr_bounds(sid, off, off+m.size(),
92
+ s1,s2,e1,e2,po_fwd,po_bwd,
93
+ &caln, NULL, &m == &m2))
94
+ {
95
+ // cout << "alignment failure" << std::endl;
96
+ }
97
+
98
+ std::cout << sid << " " << B->sid2docname(sid)
99
+ << " dfwd=" << po_fwd << " dbwd=" << po_bwd
100
+ << "\n";
101
+
102
+ write_sentence(*B->T1, sid, *B->V1, std::cout); std::cout << "\n";
103
+ write_sentence(*B->T2, sid, *B->V2, std::cout); std::cout << "\n";
104
+ B->write_yawat_alignment(sid,
105
+ m1.size() ? &m1 : NULL,
106
+ m2.size() ? &m2 : NULL, std::cout);
107
+ std::cout << std::endl;
108
+
109
+ }
110
+ }
111
+
112
+ void
113
+ interpret_args(int ac, char* av[])
114
+ {
115
+ po::variables_map vm;
116
+ po::options_description o("Options");
117
+ o.add_options()
118
+
119
+ ("help,h", "print this message")
120
+ ("maxhits,n", po::value<size_t>(&maxhits)->default_value(25),
121
+ "max. number of hits")
122
+ ("q1", po::value<string>(&Q1), "query in L1")
123
+ ("q2", po::value<string>(&Q2), "query in L2")
124
+ ;
125
+
126
+ po::options_description h("Hidden Options");
127
+ h.add_options()
128
+ ("bname", po::value<string>(&bname), "base name of corpus")
129
+ ("L1", po::value<string>(&L1), "L1 tag")
130
+ ("L2", po::value<string>(&L2), "L2 tag")
131
+ ;
132
+
133
+ h.add(o);
134
+ po::positional_options_description a;
135
+ a.add("bname",1);
136
+ a.add("L1",1);
137
+ a.add("L2",1);
138
+
139
+ po::store(po::command_line_parser(ac,av)
140
+ .options(h)
141
+ .positional(a)
142
+ .run(),vm);
143
+ po::notify(vm);
144
+ if (vm.count("help"))
145
+ {
146
+ std::cout << "\nusage:\n\t" << av[0]
147
+ << " [options] [--q1=<L1string>] [--q2=<L2string>]" << std::endl;
148
+ std::cout << o << std::endl;
149
+ exit(0);
150
+ }
151
+ }
mosesdecoder/moses/TranslationModel/UG/check-coverage.cc ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // #include "mmsapt.h"
2
+ // #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
3
+ // #include "moses/TranslationTask.h"
4
+ #include <boost/foreach.hpp>
5
+ #include <boost/format.hpp>
6
+ #include <boost/tokenizer.hpp>
7
+ #include <boost/shared_ptr.hpp>
8
+ #include <algorithm>
9
+ #include <iostream>
10
+ #include "mm/ug_bitext.h"
11
+ #include "generic/file_io/ug_stream.h"
12
+ #include <string>
13
+ #include <sstream>
14
+
15
+ using namespace Moses;
16
+ using namespace sapt;
17
+ using namespace std;
18
+ using namespace boost;
19
+
20
+ typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
21
+ typedef mmBitext<Token> bitext_t;
22
+
23
+ struct mycmp
24
+ {
25
+ bool operator() (pair<string,uint32_t> const& a,
26
+ pair<string,uint32_t> const& b) const
27
+ {
28
+ return a.second > b.second;
29
+ }
30
+ };
31
+
32
+ string
33
+ basename(string const path, string const suffix)
34
+ {
35
+ size_t p = path.find_last_of("/");
36
+ size_t k = path.size() - suffix.size();
37
+ cout << path << " " << suffix << endl;
38
+ cout << path.substr(0,p) << " " << path.substr(k) << endl;
39
+ return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
40
+ }
41
+
42
+ int main(int argc, char* argv[])
43
+ {
44
+ boost::shared_ptr<bitext_t> B(new bitext_t);
45
+ B->open(argv[1],argv[2],argv[3]);
46
+ string line;
47
+ string ifile = argv[4];
48
+ string docname = basename(ifile, string(".") + argv[2] + ".gz");
49
+ boost::iostreams::filtering_istream in;
50
+ ugdiss::open_input_stream(ifile,in);
51
+ while(getline(in,line))
52
+ {
53
+ cout << line << " [" << docname << "]" << endl;
54
+ vector<id_type> snt;
55
+ B->V1->fillIdSeq(line,snt);
56
+ for (size_t i = 0; i < snt.size(); ++i)
57
+ {
58
+ bitext_t::iter m(B->I1.get());
59
+ for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
60
+ {
61
+ if (m.ca() > 500) continue;
62
+ sapt::tsa::ArrayEntry I(m.lower_bound(-1));
63
+ char const* stop = m.upper_bound(-1);
64
+ map<string,uint32_t> cnt;
65
+ while (I.next != stop)
66
+ {
67
+ m.root->readEntry(I.next,I);
68
+ ++cnt[B->sid2docname(I.sid)];
69
+ }
70
+ cout << setw(8) << int(m.ca()) << " "
71
+ << B->V1->toString(&snt[i],&snt[k+1]) << endl;
72
+ typedef pair<string,uint32_t> entry;
73
+ vector<entry> ranked; ranked.reserve(cnt.size());
74
+ BOOST_FOREACH(entry const& e, cnt) ranked.push_back(e);
75
+ sort(ranked.begin(),ranked.end(),mycmp());
76
+ BOOST_FOREACH(entry const& e, ranked)
77
+ cout << setw(12) << " " << e.second << " " << e.first << endl;
78
+ cout << endl;
79
+ }
80
+ }
81
+ }
82
+ }
mosesdecoder/moses/TranslationModel/UG/filter-pt.cc ADDED
@@ -0,0 +1,669 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
2
+ // significance filtering for phrase tables as described in
3
+ // H. Johnson, et al. (2007) Improving Translation Quality
4
+ // by Discarding Most of the Phrasetable. EMNLP 2007.
5
+ // Implemented by Marcin Junczys-Dowmunt
6
+ // recommended use: -l a+e -n <ttable-limit>
7
+ #include <cstring>
8
+ #include <cassert>
9
+ #include <cstdio>
10
+ #include <cstdlib>
11
+ #include <algorithm>
12
+ #include <fstream>
13
+ #include <sstream>
14
+
15
+ #include <vector>
16
+ #include <iostream>
17
+ #include <set>
18
+
19
+ #include <boost/thread/tss.hpp>
20
+ #include <boost/thread.hpp>
21
+ #include <boost/unordered_map.hpp>
22
+ #include <boost/program_options.hpp>
23
+ #include <boost/shared_ptr.hpp>
24
+ #include <boost/foreach.hpp>
25
+
26
+ #ifdef WIN32
27
+ #include "WIN32_functions.h"
28
+ #else
29
+ #include <unistd.h>
30
+ #endif
31
+
32
+ #include "mm/ug_bitext.h"
33
+
34
+ // constants
35
+ const size_t MINIMUM_SIZE_TO_KEEP = 10000; // increase this to improve memory usage,
36
+ // reduce for speed
37
+ const std::string SEPARATOR = " ||| ";
38
+
39
+ const double ALPHA_PLUS_EPS = -1000.0; // dummy value
40
+ const double ALPHA_MINUS_EPS = -2000.0; // dummy value
41
+
42
+ // configuration params
43
+ int pfe_filter_limit = 0; // 0 = don't filter anything based on P(f|e)
44
+ bool print_cooc_counts = false; // add cooc counts to phrase table?
45
+ bool print_neglog_significance = false; // add -log(p) to phrase table?
46
+ double sig_filter_limit = 0; // keep phrase pairs with -log(sig) > sig_filter_limit
47
+ // higher = filter-more
48
+ bool pef_filter_only = false; // only filter based on pef
49
+ bool hierarchical = false;
50
+
51
+ double p_111 = 0.0; // alpha
52
+ size_t pt_lines = 0;
53
+ size_t nremoved_sigfilter = 0;
54
+ size_t nremoved_pfefilter = 0;
55
+
56
+ typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
57
+ typedef sapt::mmTtrack<Token> ttrack_t;
58
+ typedef sapt::mmTSA<Token> tsa_t;
59
+ typedef sapt::TokenIndex tind_t;
60
+
61
+ int num_lines;
62
+
63
+ boost::mutex in_mutex;
64
+ boost::mutex out_mutex;
65
+ boost::mutex err_mutex;
66
+
67
+ typedef size_t TextLenType;
68
+
69
+ typedef boost::shared_ptr<std::vector<TextLenType> > SentIdSet;
70
+
71
+ class Cache {
72
+ typedef std::pair<SentIdSet, clock_t> ClockedSet;
73
+ typedef boost::unordered_map<std::string, ClockedSet> ClockedMap;
74
+
75
+ public:
76
+
77
+ SentIdSet get(const std::string& phrase) {
78
+ boost::shared_lock<boost::shared_mutex> lock(m_mutex);
79
+ if(m_cont.count(phrase)) {
80
+ ClockedSet& set = m_cont[phrase];
81
+ set.second = clock();
82
+ return set.first;
83
+ }
84
+ return SentIdSet( new SentIdSet::element_type() );
85
+ }
86
+
87
+ void put(const std::string& phrase, const SentIdSet set) {
88
+ boost::unique_lock<boost::shared_mutex> lock(m_mutex);
89
+ m_cont[phrase] = std::make_pair(set, clock());
90
+ }
91
+
92
+ static void set_max_cache(size_t max_cache) {
93
+ s_max_cache = max_cache;
94
+ }
95
+
96
+ void prune() {
97
+ if(s_max_cache > 0) {
98
+ boost::upgrade_lock<boost::shared_mutex> lock(m_mutex);
99
+ if(m_cont.size() > s_max_cache) {
100
+ std::vector<clock_t> clocks;
101
+ for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++)
102
+ clocks.push_back(it->second.second);
103
+
104
+ std::sort(clocks.begin(), clocks.end());
105
+ clock_t out = clocks[m_cont.size() - s_max_cache];
106
+
107
+ boost::upgrade_to_unique_lock<boost::shared_mutex> uniq_lock(lock);
108
+ for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++)
109
+ if(it->second.second < out)
110
+ m_cont.erase(it);
111
+ }
112
+ }
113
+ }
114
+
115
+ private:
116
+ ClockedMap m_cont;
117
+ boost::shared_mutex m_mutex;
118
+ static size_t s_max_cache;
119
+ };
120
+
121
+ size_t Cache::s_max_cache = 0;
122
+
123
+ struct SA {
124
+ tind_t V;
125
+ boost::shared_ptr<ttrack_t> T;
126
+ tsa_t I;
127
+ Cache cache;
128
+ };
129
+
130
+ std::vector<boost::shared_ptr<SA> > e_sas;
131
+ std::vector<boost::shared_ptr<SA> > f_sas;
132
+
133
+ #undef min
134
+
135
+ void usage()
136
+ {
137
+ std::cerr << "\nFilter phrase table using significance testing as described\n"
138
+ << "in H. Johnson, et al. (2007) Improving Translation Quality\n"
139
+ << "by Discarding Most of the Phrasetable. EMNLP 2007.\n";
140
+ }
141
+
142
+ struct PTEntry {
143
+ PTEntry(const std::string& str, int index);
144
+ std::string f_phrase;
145
+ std::string e_phrase;
146
+ std::string extra;
147
+ std::string scores;
148
+ float pfe;
149
+ int cf;
150
+ int ce;
151
+ int cfe;
152
+ float nlog_pte;
153
+ void set_cooc_stats(int _cef, int _cf, int _ce, float nlp) {
154
+ cfe = _cef;
155
+ cf = _cf;
156
+ ce = _ce;
157
+ nlog_pte = nlp;
158
+ }
159
+
160
+ };
161
+
162
+ PTEntry::PTEntry(const std::string& str, int index) :
163
+ cf(0), ce(0), cfe(0), nlog_pte(0.0)
164
+ {
165
+ size_t pos = 0;
166
+ std::string::size_type nextPos = str.find(SEPARATOR, pos);
167
+ this->f_phrase = str.substr(pos,nextPos);
168
+
169
+ pos = nextPos + SEPARATOR.size();
170
+ nextPos = str.find(SEPARATOR, pos);
171
+ this->e_phrase = str.substr(pos,nextPos-pos);
172
+
173
+ pos = nextPos + SEPARATOR.size();
174
+ nextPos = str.find(SEPARATOR, pos);
175
+ if (nextPos < str.size()) {
176
+ this->scores = str.substr(pos,nextPos-pos);
177
+
178
+ pos = nextPos + SEPARATOR.size();
179
+ this->extra = str.substr(pos);
180
+ }
181
+ else {
182
+ this->scores = str.substr(pos,str.size()-pos);
183
+ }
184
+
185
+ int c = 0;
186
+ std::string::iterator i=scores.begin();
187
+ if (index > 0) {
188
+ for (; i != scores.end(); ++i) {
189
+ if ((*i) == ' ') {
190
+ c++;
191
+ if (c == index) break;
192
+ }
193
+ }
194
+ }
195
+ if (i != scores.end()) {
196
+ ++i;
197
+ }
198
+ char f[24];
199
+ char *fp=f;
200
+ while (i != scores.end() && *i != ' ') {
201
+ *fp++=*i++;
202
+ }
203
+ *fp++=0;
204
+
205
+ this->pfe = atof(f);
206
+ }
207
+
208
+ struct PfeComparer {
209
+ bool operator()(const PTEntry* a, const PTEntry* b) const {
210
+ return a->pfe > b->pfe;
211
+ }
212
+ };
213
+
214
+ struct NlogSigThresholder {
215
+ NlogSigThresholder(float threshold) : t(threshold) {}
216
+ float t;
217
+ bool operator()(const PTEntry* a) const {
218
+ if (a->nlog_pte < t) {
219
+ delete a;
220
+ return true;
221
+ } else return false;
222
+ }
223
+ };
224
+
225
+ std::ostream& operator << (std::ostream& os, const PTEntry& pp)
226
+ {
227
+ os << pp.f_phrase << " ||| " << pp.e_phrase;
228
+ os << " ||| " << pp.scores;
229
+ if (pp.extra.size()>0) os << " ||| " << pp.extra;
230
+ if (print_cooc_counts) os << " ||| " << pp.cfe << " " << pp.cf << " " << pp.ce;
231
+ if (print_neglog_significance) os << " ||| " << pp.nlog_pte;
232
+ return os;
233
+ }
234
+
235
+ void print(int a, int b, int c, int d, float p)
236
+ {
237
+ std::cerr << a << "\t" << b << "\t P=" << p << "\n"
238
+ << c << "\t" << d << "\t xf="
239
+ << (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1) << "\n\n";
240
+ }
241
+
242
+ // 2x2 (one-sided) Fisher's exact test
243
+ // see B. Moore. (2004) On Log Likelihood and the Significance of Rare Events
244
+ double fisher_exact(int cfe, int ce, int cf)
245
+ {
246
+ assert(cfe <= ce);
247
+ assert(cfe <= cf);
248
+
249
+ int a = cfe;
250
+ int b = (cf - cfe);
251
+ int c = (ce - cfe);
252
+ int d = (num_lines - ce - cf + cfe);
253
+ int n = a + b + c + d;
254
+
255
+ double cp = exp(lgamma(1+a+c) + lgamma(1+b+d) + lgamma(1+a+b) + lgamma(1+c+d)
256
+ - lgamma(1+n) - lgamma(1+a) - lgamma(1+b) - lgamma(1+c)
257
+ - lgamma(1+d));
258
+ double total_p = 0.0;
259
+ int tc = std::min(b,c);
260
+ for (int i=0; i<=tc; i++) {
261
+ total_p += cp;
262
+ double coef = (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1);
263
+ cp *= coef;
264
+ ++a;
265
+ --c;
266
+ ++d;
267
+ --b;
268
+ }
269
+ return total_p;
270
+ }
271
+
272
+ template <class setType>
273
+ void ordered_set_intersect(setType& out, const setType set_1, const setType set_2)
274
+ {
275
+ std::set_intersection(set_1->begin(), set_1->end(), set_2->begin(),
276
+ set_2->end(), inserter(*out, out->begin()) );
277
+ }
278
+
279
+
280
+ void lookup_phrase(SentIdSet& ids, const std::string& phrase,
281
+ tsa_t &my_sa, tind_t &my_v, Cache& cache)
282
+ {
283
+ ids = cache.get(phrase);
284
+ if(ids->empty()) {
285
+
286
+ std::vector<sapt::id_type> snt;
287
+ my_v.fillIdSeq(phrase, snt);
288
+
289
+ tsa_t::tree_iterator m(&my_sa);
290
+ size_t k = 0;
291
+ while (k < snt.size() && m.extend(snt[k])) ++k;
292
+ if(k == snt.size()) {
293
+ ids->reserve(m.approxOccurrenceCount()+10);
294
+ sapt::tsa::ArrayEntry I(m.lower_bound(-1));
295
+ char const* stop = m.upper_bound(-1);
296
+ do {
297
+ m.root->readEntry(I.next,I);
298
+ ids->push_back(I.sid);
299
+ } while (I.next != stop);
300
+
301
+ std::sort(ids->begin(), ids->end());
302
+ SentIdSet::element_type::iterator it =
303
+ std::unique(ids->begin(), ids->end());
304
+ ids->resize(it - ids->begin());
305
+
306
+ if(ids->size() >= MINIMUM_SIZE_TO_KEEP)
307
+ cache.put(phrase, ids);
308
+ }
309
+ }
310
+ }
311
+
312
+ void lookup_multiple_phrases(SentIdSet& ids, std::vector<std::string> & phrases,
313
+ tsa_t & my_sa, tind_t &my_v,
314
+ const std::string & rule, Cache& cache)
315
+ {
316
+
317
+ if (phrases.size() == 1) {
318
+ lookup_phrase(ids, phrases.front(), my_sa, my_v, cache);
319
+ }
320
+ else {
321
+ SentIdSet main_set( new SentIdSet::element_type() );
322
+ bool first = true;
323
+ SentIdSet first_set( new SentIdSet::element_type() );
324
+ lookup_phrase(first_set, phrases.front(), my_sa, my_v, cache);
325
+ for (std::vector<std::string>::iterator phrase=phrases.begin()+1;
326
+ phrase != phrases.end(); ++phrase) {
327
+ SentIdSet temp_set( new SentIdSet::element_type() );
328
+ lookup_phrase(temp_set, *phrase, my_sa, my_v, cache);
329
+ if (first) {
330
+ ordered_set_intersect(main_set, first_set, temp_set);
331
+ first = false;
332
+ }
333
+ else {
334
+ SentIdSet new_set( new SentIdSet::element_type() );
335
+ ordered_set_intersect(new_set, main_set, temp_set);
336
+ main_set->swap(*new_set);
337
+ }
338
+ }
339
+ ids->swap(*main_set);
340
+ }
341
+ }
342
+
343
+
344
+ void find_occurrences(SentIdSet& ids, const std::string& rule,
345
+ tsa_t& my_sa, tind_t &my_v, Cache& cache)
346
+ {
347
+ // we search for hierarchical rules by stripping away NT and looking for terminals sequences
348
+ // if a rule contains multiple sequences of terminals, we intersect their occurrences.
349
+ if (hierarchical) {
350
+ // std::cerr << "splitting up phrase: " << phrase << "\n";
351
+ int pos = 0;
352
+ int NTStartPos, NTEndPos;
353
+ std::vector<std::string> phrases;
354
+ while (rule.find("] ", pos) < rule.size()) {
355
+ NTStartPos = rule.find("[",pos) - 1; // -1 to cut space before NT
356
+ NTEndPos = rule.find("] ",pos);
357
+ if (NTStartPos < pos) { // no space: NT at start of rule (or two consecutive NTs)
358
+ pos = NTEndPos + 2;
359
+ continue;
360
+ }
361
+ phrases.push_back(rule.substr(pos,NTStartPos-pos));
362
+ pos = NTEndPos + 2;
363
+ }
364
+
365
+ NTStartPos = rule.find("[",pos) - 1; // LHS of rule
366
+ if (NTStartPos > pos) {
367
+ phrases.push_back(rule.substr(pos,NTStartPos-pos));
368
+ }
369
+
370
+ lookup_multiple_phrases(ids, phrases, my_sa, my_v, rule, cache);
371
+ }
372
+ else {
373
+ lookup_phrase(ids, rule, my_sa, my_v, cache);
374
+ }
375
+ }
376
+
377
+
378
+ // input: unordered list of translation options for a single source phrase
379
+ void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
380
+ {
381
+ if (pfe_filter_limit > 0 && options.size() > pfe_filter_limit) {
382
+ nremoved_pfefilter += (options.size() - pfe_filter_limit);
383
+ std::nth_element(options.begin(), options.begin() + pfe_filter_limit,
384
+ options.end(), PfeComparer());
385
+ for (std::vector<PTEntry*>::iterator i = options.begin() + pfe_filter_limit;
386
+ i != options.end(); ++i)
387
+ delete *i;
388
+ options.erase(options.begin() + pfe_filter_limit,options.end());
389
+ }
390
+
391
+ if (pef_filter_only)
392
+ return;
393
+
394
+ if (options.empty())
395
+ return;
396
+
397
+ size_t cf = 0;
398
+ std::vector<SentIdSet> fsets;
399
+ BOOST_FOREACH(boost::shared_ptr<SA>& f_sa, f_sas) {
400
+ fsets.push_back( boost::shared_ptr<SentIdSet::element_type>(new SentIdSet::element_type()) );
401
+ find_occurrences(fsets.back(), options.front()->f_phrase, f_sa->I, f_sa->V, f_sa->cache);
402
+ cf += fsets.back()->size();
403
+ }
404
+
405
+ for (std::vector<PTEntry*>::iterator i = options.begin();
406
+ i != options.end(); ++i) {
407
+ const std::string& e_phrase = (*i)->e_phrase;
408
+
409
+ size_t ce = 0;
410
+ std::vector<SentIdSet> esets;
411
+ BOOST_FOREACH(boost::shared_ptr<SA>& e_sa, e_sas) {
412
+ esets.push_back( boost::shared_ptr<SentIdSet::element_type>(new SentIdSet::element_type()) );
413
+ find_occurrences(esets.back(), e_phrase, e_sa->I, e_sa->V, e_sa->cache);
414
+ ce += esets.back()->size();
415
+ }
416
+
417
+ size_t cef = 0;
418
+ for(size_t j = 0; j < fsets.size(); ++j) {
419
+ SentIdSet efset( new SentIdSet::element_type() );
420
+ ordered_set_intersect(efset, fsets[j], esets[j]);
421
+ cef += efset->size();
422
+ }
423
+
424
+ double nlp = -log(fisher_exact(cef, cf, ce));
425
+ (*i)->set_cooc_stats(cef, cf, ce, nlp);
426
+ }
427
+
428
+ std::vector<PTEntry*>::iterator new_end =
429
+ std::remove_if(options.begin(), options.end(),
430
+ NlogSigThresholder(sig_filter_limit));
431
+ nremoved_sigfilter += (options.end() - new_end);
432
+ options.erase(new_end,options.end());
433
+ }
434
+
435
+ void filter_thread(std::istream* in, std::ostream* out, int pfe_index) {
436
+
437
+ std::vector<std::string> lines;
438
+ std::string prev = "";
439
+ std::vector<PTEntry*> options;
440
+ while(true) {
441
+ {
442
+ boost::mutex::scoped_lock lock(in_mutex);
443
+ if(in->eof())
444
+ break;
445
+
446
+ lines.clear();
447
+ std::string line;
448
+ while(getline(*in, line) && lines.size() < 500000)
449
+ lines.push_back(line);
450
+ }
451
+
452
+ std::stringstream out_temp;
453
+ for(std::vector<std::string>::iterator it = lines.begin(); it != lines.end(); it++) {
454
+ size_t tmp_lines = ++pt_lines;
455
+ if(tmp_lines % 10000 == 0) {
456
+ boost::mutex::scoped_lock lock(err_mutex);
457
+ std::cerr << ".";
458
+
459
+ if(tmp_lines % 500000 == 0)
460
+ std::cerr << "[n:" << tmp_lines << "]\n";
461
+
462
+ if(tmp_lines % 10000000 == 0) {
463
+ float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
464
+ float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
465
+ std::cerr << "------------------------------------------------------\n"
466
+ << " unfiltered phrases pairs: " << pt_lines << "\n"
467
+ << "\n"
468
+ << " P(f|e) filter [first]: " << nremoved_pfefilter << " (" << pfefper << "%)\n"
469
+ << " significance filter: " << nremoved_sigfilter << " (" << sigfper << "%)\n"
470
+ << " TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << " (" << (sigfper + pfefper) << "%)\n"
471
+ << "\n"
472
+ << " FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << " (" << (100.0-sigfper - pfefper) << "%)\n"
473
+ << "------------------------------------------------------\n";
474
+ }
475
+ }
476
+
477
+ if(pt_lines % 10000 == 0) {
478
+ BOOST_FOREACH(boost::shared_ptr<SA> f_sa, f_sas)
479
+ f_sa->cache.prune();
480
+ BOOST_FOREACH(boost::shared_ptr<SA> e_sa, e_sas)
481
+ e_sa->cache.prune();
482
+ }
483
+
484
+ if(it->length() > 0) {
485
+ PTEntry* pp = new PTEntry(it->c_str(), pfe_index);
486
+ if (prev != pp->f_phrase) {
487
+ prev = pp->f_phrase;
488
+
489
+ if (!options.empty()) { // always true after first line
490
+ compute_cooc_stats_and_filter(options);
491
+ }
492
+
493
+ for (std::vector<PTEntry*>::iterator i = options.begin();
494
+ i != options.end(); ++i) {
495
+ out_temp << **i << '\n';
496
+ delete *i;
497
+ }
498
+
499
+ options.clear();
500
+ options.push_back(pp);
501
+
502
+ } else {
503
+ options.push_back(pp);
504
+ }
505
+ }
506
+ }
507
+ boost::mutex::scoped_lock lock(out_mutex);
508
+ *out << out_temp.str() << std::flush;
509
+ }
510
+ compute_cooc_stats_and_filter(options);
511
+
512
+ boost::mutex::scoped_lock lock(out_mutex);
513
+ for (std::vector<PTEntry*>::iterator i = options.begin();
514
+ i != options.end(); ++i) {
515
+ *out << **i << '\n';
516
+ delete *i;
517
+ }
518
+ *out << std::flush;
519
+ }
520
+
521
+ namespace po = boost::program_options;
522
+
523
+ int main(int argc, char * argv[])
524
+ {
525
+ bool help;
526
+ std::vector<std::string> efiles;
527
+ std::vector<std::string> ffiles;
528
+ int pfe_index = 2;
529
+ int threads = 1;
530
+ size_t max_cache = 0;
531
+ std::string str_sig_filter_limit;
532
+
533
+ po::options_description general("General options");
534
+ general.add_options()
535
+ ("english,e", po::value<std::vector<std::string> >(&efiles)->multitoken(),
536
+ "english.suf-arr")
537
+ ("french,f", po::value<std::vector<std::string> >(&ffiles)->multitoken(),
538
+ "french.suf-arr")
539
+ ("pfe-index,i", po::value(&pfe_index)->default_value(2),
540
+ "Index of P(f|e) in phrase table")
541
+ ("pfe-filter-limit,n", po::value(&pfe_filter_limit)->default_value(0),
542
+ "0, 1...: 0=no filtering, >0 sort by P(e|f) and keep the top num elements")
543
+ ("threads,t", po::value(&threads)->default_value(1),
544
+ "number of threads to use")
545
+ ("max-cache,m", po::value(&max_cache)->default_value(0),
546
+ "limit cache to arg most recent phrases")
547
+ ("print-cooc,c", po::value(&print_cooc_counts)->zero_tokens()->default_value(false),
548
+ "add the coocurrence counts to the phrase table")
549
+ ("print-significance,p", po::value(&print_neglog_significance)->zero_tokens()->default_value(false),
550
+ "add -log(significance) to the phrase table")
551
+ ("hierarchical,x", po::value(&hierarchical)->zero_tokens()->default_value(false),
552
+ "filter hierarchical rule table")
553
+ ("sig-filter-limit,l", po::value(&str_sig_filter_limit),
554
+ ">0.0, a+e, or a-e: keep values that have a -log significance > this")
555
+ ("help,h", po::value(&help)->zero_tokens()->default_value(false),
556
+ "display this message")
557
+ ;
558
+
559
+ po::options_description cmdline_options("Allowed options");
560
+ cmdline_options.add(general);
561
+ po::variables_map vm;
562
+
563
+ try {
564
+ po::store(po::command_line_parser(argc,argv).
565
+ options(cmdline_options).run(), vm);
566
+ po::notify(vm);
567
+ }
568
+ catch (std::exception& e) {
569
+ std::cout << "Error: " << e.what() << std::endl << std::endl;
570
+
571
+ usage();
572
+ std::cout << cmdline_options << std::endl;
573
+ exit(0);
574
+ }
575
+
576
+ if(vm["help"].as<bool>()) {
577
+ usage();
578
+ std::cout << cmdline_options << std::endl;
579
+ exit(0);
580
+ }
581
+
582
+ if(vm.count("pfe-filter-limit"))
583
+ std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl;
584
+ if(vm.count("threads"))
585
+ std::cerr << "Using threads: " << threads << std::endl;
586
+ if(vm.count("max-cache"))
587
+ std::cerr << "Using max phrases in caches: " << max_cache << std::endl;
588
+
589
+ if (strcmp(str_sig_filter_limit.c_str(),"a+e") == 0) {
590
+ sig_filter_limit = ALPHA_PLUS_EPS;
591
+ } else if (strcmp(str_sig_filter_limit.c_str(),"a-e") == 0) {
592
+ sig_filter_limit = ALPHA_MINUS_EPS;
593
+ } else {
594
+ char *x;
595
+ sig_filter_limit = strtod(str_sig_filter_limit.c_str(), &x);
596
+ if (sig_filter_limit < 0.0) {
597
+ std::cerr << "Filter limit (-l) must be either 'a+e', 'a-e' or a real number >= 0.0\n";
598
+ usage();
599
+ }
600
+ }
601
+
602
+ if (sig_filter_limit == 0.0) pef_filter_only = true;
603
+ //-----------------------------------------------------------------------------
604
+ if (optind != argc || ((efiles.empty() || ffiles.empty()) && !pef_filter_only)) {
605
+ usage();
606
+ }
607
+
608
+ if (!pef_filter_only) {
609
+ size_t elines = 0;
610
+ BOOST_FOREACH(std::string& efile, efiles) {
611
+ e_sas.push_back(boost::shared_ptr<SA>(new SA()));
612
+ e_sas.back()->V.open(efile + ".tdx");
613
+ e_sas.back()->T.reset(new ttrack_t());
614
+ e_sas.back()->T->open(efile + ".mct");
615
+ e_sas.back()->I.open(efile + ".sfa", e_sas.back()->T);
616
+ elines += e_sas.back()->T->size();
617
+ }
618
+
619
+ size_t flines = 0;
620
+ BOOST_FOREACH(std::string& ffile, ffiles) {
621
+ f_sas.push_back(boost::shared_ptr<SA>(new SA()));
622
+ f_sas.back()->V.open(ffile + ".tdx");
623
+ f_sas.back()->T.reset(new ttrack_t());
624
+ f_sas.back()->T->open(ffile + ".mct");
625
+ f_sas.back()->I.open(ffile + ".sfa", f_sas.back()->T);
626
+ flines += f_sas.back()->T->size();
627
+ }
628
+
629
+ if (elines != flines) {
630
+ std::cerr << "Number of lines in e-corpus != number of lines in f-corpus!\n";
631
+ usage();
632
+ exit(1);
633
+ } else {
634
+ std::cerr << "Training corpus: " << elines << " lines\n";
635
+ num_lines = elines;
636
+ }
637
+ p_111 = -log(fisher_exact(1,1,1));
638
+ std::cerr << "\\alpha = " << p_111 << "\n";
639
+ if (sig_filter_limit == ALPHA_MINUS_EPS) {
640
+ sig_filter_limit = p_111 - 0.001;
641
+ } else if (sig_filter_limit == ALPHA_PLUS_EPS) {
642
+ sig_filter_limit = p_111 + 0.001;
643
+ }
644
+ std::cerr << "Sig filter threshold is = " << sig_filter_limit << "\n";
645
+ } else {
646
+ std::cerr << "Filtering using P(e|f) only. n=" << pfe_filter_limit << std::endl;
647
+ }
648
+
649
+ Cache::set_max_cache(max_cache);
650
+ std::ios_base::sync_with_stdio(false);
651
+
652
+ boost::thread_group threadGroup;
653
+ for(int i = 0; i < threads; i++)
654
+ threadGroup.add_thread(new boost::thread(filter_thread, &std::cin, &std::cout, pfe_index));
655
+ threadGroup.join_all();
656
+
657
+ float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
658
+ float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
659
+
660
+ std::cerr << "\n\n------------------------------------------------------\n"
661
+ << " unfiltered phrases pairs: " << pt_lines << "\n"
662
+ << "\n"
663
+ << " P(f|e) filter [first]: " << nremoved_pfefilter << " (" << pfefper << "%)\n"
664
+ << " significance filter: " << nremoved_sigfilter << " (" << sigfper << "%)\n"
665
+ << " TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << " (" << (sigfper + pfefper) << "%)\n"
666
+ << "\n"
667
+ << " FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << " (" << (100.0-sigfper - pfefper) << "%)\n"
668
+ << "------------------------------------------------------\n";
669
+ }
mosesdecoder/moses/TranslationModel/UG/ptable-describe-features.cc ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
2
+ #include "mmsapt.h"
3
+ #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
4
+ #include <boost/foreach.hpp>
5
+ #include <boost/format.hpp>
6
+ #include <boost/tokenizer.hpp>
7
+ #include <boost/shared_ptr.hpp>
8
+ #include <algorithm>
9
+ #include <iostream>
10
+ #include "moses/parameters/AllOptions.h"
11
+
12
+ using namespace Moses;
13
+ using namespace sapt;
14
+ using namespace std;
15
+ using namespace boost;
16
+
17
+ int main()
18
+ {
19
+ string line;
20
+ while(getline(cin,line))
21
+ {
22
+ if (line.empty()) continue;
23
+ size_t k = line.find_first_not_of(" ");
24
+ if (line.find("Mmsapt") != k &&
25
+ line.find("PhraseDictionaryBitextSampling") != k)
26
+ continue;
27
+ AllOptions::ptr opts(new AllOptions);
28
+ Mmsapt PT(line);
29
+ PT.Load(opts, false);
30
+ cout << PT.GetName() << ":" << endl;
31
+ vector<string> const& fnames = PT.GetFeatureNames();
32
+ BOOST_FOREACH(string const& s, fnames)
33
+ cout << s << endl;
34
+ cout << endl;
35
+ }
36
+ exit(0);
37
+ }
38
+
39
+
40
+
mosesdecoder/moses/TranslationModel/UG/sapt_pscore_cumulative_bias.h ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- c++ -*-
2
+ // Phrase scorer that records the aggregated bias score
3
+ //
4
+
5
+ #include "util/exception.hh"
6
+ #include "sapt_pscore_base.h"
7
+ #include <boost/dynamic_bitset.hpp>
8
+ #include <cstdio>
9
+
10
+ namespace sapt {
11
+
12
+ template<typename Token>
13
+ class
14
+ PScoreCumBias : public PhraseScorer<Token>
15
+ {
16
+ float m_floor;
17
+ public:
18
+ PScoreCumBias(std::string const spec)
19
+ {
20
+ this->m_index = -1;
21
+ this->m_feature_names.push_back("cumb");
22
+ this->m_num_feats = this->m_feature_names.size();
23
+ this->m_floor = std::atof(spec.c_str());
24
+ }
25
+
26
+ bool
27
+ isIntegerValued(int i) const { return false; }
28
+
29
+ void
30
+ operator()(Bitext<Token> const& bt,
31
+ PhrasePair<Token>& pp,
32
+ std::vector<float> * dest = NULL) const
33
+ {
34
+ if (!dest) dest = &pp.fvals;
35
+ (*dest)[this->m_index] = log(std::max(m_floor,pp.cum_bias));
36
+ }
37
+ };
38
+ } // namespace sapt
39
+
mosesdecoder/moses/TranslationModel/UG/sapt_pscore_logcnt.h ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- c++ -*-
2
+ // Phrase scorer that rewards the number of phrase pair occurrences in a bitext
3
+ // with the asymptotic function x/(j+x) where x > 0 is a function
4
+ // parameter that determines the steepness of the rewards curve
5
+ // written by Ulrich Germann
6
+
7
+ #include "sapt_pscore_base.h"
8
+ #include <boost/dynamic_bitset.hpp>
9
+
10
+ namespace sapt {
11
+
12
+ template<typename Token>
13
+ class
14
+ PScoreLogCnt : public PhraseScorer<Token>
15
+ {
16
+ std::string m_specs;
17
+ public:
18
+ PScoreLogCnt(std::string const specs)
19
+ {
20
+ this->m_index = -1;
21
+ this->m_specs = specs;
22
+ if (specs.find("r1") != std::string::npos) // raw source phrase counts
23
+ this->m_feature_names.push_back("log-r1");
24
+ if (specs.find("s1") != std::string::npos)
25
+ this->m_feature_names.push_back("log-s1"); // L1 sample size
26
+ if (specs.find("g1") != std::string::npos) // coherent phrases
27
+ this->m_feature_names.push_back("log-g1");
28
+ if (specs.find("j") != std::string::npos) // joint counts
29
+ this->m_feature_names.push_back("log-j");
30
+ if (specs.find("r2") != std::string::npos) // raw target phrase counts
31
+ this->m_feature_names.push_back("log-r2");
32
+ this->m_num_feats = this->m_feature_names.size();
33
+ }
34
+
35
+ bool
36
+ isIntegerValued(int i) const { return true; }
37
+
38
+ void
39
+ operator()(Bitext<Token> const& bt,
40
+ PhrasePair<Token>& pp,
41
+ std::vector<float> * dest = NULL) const
42
+ {
43
+ if (!dest) dest = &pp.fvals;
44
+ assert(pp.raw1);
45
+ assert(pp.sample1);
46
+ assert(pp.good1);
47
+ assert(pp.joint);
48
+ assert(pp.raw2);
49
+ size_t i = this->m_index;
50
+ if (m_specs.find("r1") != std::string::npos)
51
+ (*dest)[i++] = log(pp.raw1);
52
+ if (m_specs.find("s1") != std::string::npos)
53
+ (*dest)[i++] = log(pp.sample1);
54
+ if (m_specs.find("g1") != std::string::npos)
55
+ (*dest)[i++] = log(pp.good1);
56
+ if (m_specs.find("j") != std::string::npos)
57
+ (*dest)[i++] = log(pp.joint);
58
+ if (m_specs.find("r2") != std::string::npos)
59
+ (*dest)[i] = log(pp.raw2);
60
+ }
61
+ };
62
+ } // namespace sapt
mosesdecoder/moses/TranslationModel/UG/sapt_pscore_rareness.h ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- c++ -*-
2
+ // Phrase scorer that rewards the number of phrase pair occurrences in a bitext
3
+ // with the asymptotic function x/(j+x) where x > 0 is a function
4
+ // parameter that determines the steepness of the rewards curve
5
+ // written by Ulrich Germann
6
+
7
+ #include "sapt_pscore_base.h"
8
+ #include <boost/dynamic_bitset.hpp>
9
+
10
+ namespace sapt {
11
+
12
+ // rareness penalty: x/(n+x)
13
+ template<typename Token>
14
+ class
15
+ PScoreRareness : public SingleRealValuedParameterPhraseScorerFamily<Token>
16
+ {
17
+ public:
18
+ PScoreRareness(std::string const spec)
19
+ {
20
+ this->m_tag = "rare";
21
+ this->init(spec);
22
+ }
23
+
24
+ bool
25
+ isLogVal(int i) const { return false; }
26
+
27
+ void
28
+ operator()(Bitext<Token> const& bt,
29
+ PhrasePair<Token>& pp,
30
+ std::vector<float> * dest = NULL) const
31
+ {
32
+ if (!dest) dest = &pp.fvals;
33
+ size_t i = this->m_index;
34
+ BOOST_FOREACH(float const x, this->m_x)
35
+ (*dest).at(i++) = x/(x + pp.joint);
36
+ }
37
+ };
38
+ } // namespace sapt
mosesdecoder/moses/TranslationModel/UG/sapt_pscore_wordcount.h ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // -*- c++ -*-
2
+ // written by Ulrich Germann
3
+ #pragma once
4
+ #include "moses/TranslationModel/UG/mm/ug_bitext.h"
5
+ #include "util/exception.hh"
6
+ #include "boost/format.hpp"
7
+ #include "sapt_pscore_base.h"
8
+
9
+ namespace sapt
10
+ {
11
+ template<typename Token>
12
+ class
13
+ PScoreWC : public PhraseScorer<Token>
14
+ {
15
+ public:
16
+ PScoreWC(std::string const dummy)
17
+ {
18
+ this->m_index = -1;
19
+ this->m_num_feats = 1;
20
+ this->m_feature_names.push_back(std::string("wordcount"));
21
+ }
22
+
23
+ void
24
+ operator()(Bitext<Token> const& bt,
25
+ PhrasePair<Token>& pp,
26
+ std::vector<float> * dest = NULL) const
27
+ {
28
+ if (!dest) dest = &pp.fvals;
29
+ (*dest)[this->m_index] = pp.len2;
30
+ }
31
+ };
32
+ }
33
+