Upload 416 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +6 -0
- tools/giza-pp/GIZA++-v2/ATables.cpp +119 -0
- tools/giza-pp/GIZA++-v2/ATables.h +172 -0
- tools/giza-pp/GIZA++-v2/AlignTables.cpp +44 -0
- tools/giza-pp/GIZA++-v2/AlignTables.h +118 -0
- tools/giza-pp/GIZA++-v2/Array.h +5 -0
- tools/giza-pp/GIZA++-v2/Array2.h +107 -0
- tools/giza-pp/GIZA++-v2/Array4.h +78 -0
- tools/giza-pp/GIZA++-v2/D4Tables.h +460 -0
- tools/giza-pp/GIZA++-v2/D5Tables.h +235 -0
- tools/giza-pp/GIZA++-v2/Dictionary.cpp +94 -0
- tools/giza-pp/GIZA++-v2/Dictionary.h +48 -0
- tools/giza-pp/GIZA++-v2/FlexArray.h +58 -0
- tools/giza-pp/GIZA++-v2/ForwardBackward.cpp +242 -0
- tools/giza-pp/GIZA++-v2/ForwardBackward.h +62 -0
- tools/giza-pp/GIZA++-v2/GIZA++ +3 -0
- tools/giza-pp/GIZA++-v2/GNU.GPL +282 -0
- tools/giza-pp/GIZA++-v2/Globals.h +73 -0
- tools/giza-pp/GIZA++-v2/HMMTables.cpp +177 -0
- tools/giza-pp/GIZA++-v2/HMMTables.h +172 -0
- tools/giza-pp/GIZA++-v2/LICENSE +282 -0
- tools/giza-pp/GIZA++-v2/Makefile +140 -0
- tools/giza-pp/GIZA++-v2/Makefile.definitions +0 -0
- tools/giza-pp/GIZA++-v2/Makefile.src +2 -0
- tools/giza-pp/GIZA++-v2/MoveSwapMatrix.cpp +231 -0
- tools/giza-pp/GIZA++-v2/MoveSwapMatrix.h +116 -0
- tools/giza-pp/GIZA++-v2/NTables.cpp +93 -0
- tools/giza-pp/GIZA++-v2/NTables.h +145 -0
- tools/giza-pp/GIZA++-v2/Parameter.cpp +144 -0
- tools/giza-pp/GIZA++-v2/Parameter.h +200 -0
- tools/giza-pp/GIZA++-v2/Perplexity.cpp +40 -0
- tools/giza-pp/GIZA++-v2/Perplexity.h +108 -0
- tools/giza-pp/GIZA++-v2/Pointer.h +175 -0
- tools/giza-pp/GIZA++-v2/README +508 -0
- tools/giza-pp/GIZA++-v2/TTables.cpp +323 -0
- tools/giza-pp/GIZA++-v2/TTables.h +417 -0
- tools/giza-pp/GIZA++-v2/Vector.h +427 -0
- tools/giza-pp/GIZA++-v2/WordClasses.h +96 -0
- tools/giza-pp/GIZA++-v2/alignment.cpp +38 -0
- tools/giza-pp/GIZA++-v2/alignment.h +227 -0
- tools/giza-pp/GIZA++-v2/collCounts.cpp +293 -0
- tools/giza-pp/GIZA++-v2/collCounts.h +80 -0
- tools/giza-pp/GIZA++-v2/defs.h +78 -0
- tools/giza-pp/GIZA++-v2/dependencies +635 -0
- tools/giza-pp/GIZA++-v2/file_spec.h +60 -0
- tools/giza-pp/GIZA++-v2/getSentence.cpp +340 -0
- tools/giza-pp/GIZA++-v2/getSentence.h +123 -0
- tools/giza-pp/GIZA++-v2/hmm.cpp +405 -0
- tools/giza-pp/GIZA++-v2/hmm.h +82 -0
- tools/giza-pp/GIZA++-v2/logprob.cpp +154 -0
.gitattributes
CHANGED
|
@@ -270,3 +270,9 @@ tools/mgiza/mgizapp/inst/hmmnorm filter=lfs diff=lfs merge=lfs -text
|
|
| 270 |
tools/mgiza/mgizapp/inst/lib/libmgiza.a filter=lfs diff=lfs merge=lfs -text
|
| 271 |
tools/mgiza/mgizapp/inst/mgiza filter=lfs diff=lfs merge=lfs -text
|
| 272 |
tools/mgiza/mgizapp/lib/libmgiza.a filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
tools/mgiza/mgizapp/inst/lib/libmgiza.a filter=lfs diff=lfs merge=lfs -text
|
| 271 |
tools/mgiza/mgizapp/inst/mgiza filter=lfs diff=lfs merge=lfs -text
|
| 272 |
tools/mgiza/mgizapp/lib/libmgiza.a filter=lfs diff=lfs merge=lfs -text
|
| 273 |
+
tools/giza-pp/GIZA++-v2/GIZA++ filter=lfs diff=lfs merge=lfs -text
|
| 274 |
+
tools/giza/d4norm filter=lfs diff=lfs merge=lfs -text
|
| 275 |
+
tools/giza/GIZA++ filter=lfs diff=lfs merge=lfs -text
|
| 276 |
+
tools/giza/GIZA++-v2/GIZA++ filter=lfs diff=lfs merge=lfs -text
|
| 277 |
+
tools/giza/hmmnorm filter=lfs diff=lfs merge=lfs -text
|
| 278 |
+
tools/giza/mgiza filter=lfs diff=lfs merge=lfs -text
|
tools/giza-pp/GIZA++-v2/ATables.cpp
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
#include "ATables.h"
|
| 23 |
+
#include "Globals.h"
|
| 24 |
+
#include "myassert.h"
|
| 25 |
+
#include "Parameter.h"
|
| 26 |
+
|
| 27 |
+
GLOBAL_PARAMETER(bool,CompactADTable,"compactadtable","1: only 3-dimensional alignment table for IBM-2 and IBM-3",PARLEV_MODELS,1);
|
| 28 |
+
GLOBAL_PARAMETER(float,amodel_smooth_factor,"model23SmoothFactor","smoothing parameter for IBM-2/3 (interpolation with constant)",PARLEV_SMOOTH,0.0);
|
| 29 |
+
|
| 30 |
+
template <class VALTYPE>
|
| 31 |
+
void amodel<VALTYPE>::printTable(const char *filename) const
|
| 32 |
+
// print amodel to file with the name filename (it'll be created or overwritten
|
| 33 |
+
// format : for a table :
|
| 34 |
+
// aj j l m val
|
| 35 |
+
// where aj is source word pos, j target word pos, l source sentence length,
|
| 36 |
+
// m is target sentence length.
|
| 37 |
+
//
|
| 38 |
+
{
|
| 39 |
+
//return;
|
| 40 |
+
if (is_distortion)
|
| 41 |
+
cout << "Dumping distortion table (d) to file:" << filename <<'\n';
|
| 42 |
+
else
|
| 43 |
+
cout << "Dumping alignment table (a) to file:" << filename <<'\n';
|
| 44 |
+
|
| 45 |
+
ofstream of(filename);
|
| 46 |
+
double ssum=0.0;
|
| 47 |
+
for(WordIndex l=0; l < MaxSentLength; l++)
|
| 48 |
+
for(WordIndex m=0;m<MaxSentLength;m++)
|
| 49 |
+
{
|
| 50 |
+
if( CompactADTable && l!=m )
|
| 51 |
+
continue;
|
| 52 |
+
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
|
| 53 |
+
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
|
| 54 |
+
if( is_distortion==0 )
|
| 55 |
+
for(WordIndex j=1;j<=M; j++)
|
| 56 |
+
{
|
| 57 |
+
double sum=0.0;
|
| 58 |
+
for(WordIndex i=0;i<=L; i++)
|
| 59 |
+
{
|
| 60 |
+
VALTYPE x=getValue(i, j, L, M);
|
| 61 |
+
if( x>PROB_SMOOTH )
|
| 62 |
+
{
|
| 63 |
+
of << i << ' ' << j << ' ' << L << ' ' << M << ' ' << x << '\n';
|
| 64 |
+
sum+=x;
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
ssum+=sum;
|
| 68 |
+
}
|
| 69 |
+
else
|
| 70 |
+
for(WordIndex i=0;i<=L;i++)
|
| 71 |
+
{
|
| 72 |
+
double sum=0.0;
|
| 73 |
+
for(WordIndex j=1;j<=M;j++)
|
| 74 |
+
|
| 75 |
+
{
|
| 76 |
+
VALTYPE x=getValue(j, i, L, M);
|
| 77 |
+
if( x>PROB_SMOOTH )
|
| 78 |
+
{
|
| 79 |
+
of << j << ' ' << i << ' ' << L << ' ' << M << ' ' << x << '\n';
|
| 80 |
+
sum+=x;
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
ssum+=sum;
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
extern short NoEmptyWord;
|
| 89 |
+
|
| 90 |
+
template <class VALTYPE>
|
| 91 |
+
void amodel<VALTYPE>::readTable(const char *filename)
|
| 92 |
+
{
|
| 93 |
+
/* This function reads the a table from a file.
|
| 94 |
+
Each line is of the format: aj j l m val
|
| 95 |
+
where aj is the source word position, j the target word position,
|
| 96 |
+
l the source sentence length, and m the target sentence length
|
| 97 |
+
|
| 98 |
+
This function also works for a d table, where the positions
|
| 99 |
+
of aj and i are swapped. Both the a and d tables are 4 dimensional
|
| 100 |
+
hashes; this function will simply read in the four values and keep
|
| 101 |
+
them in that order when hashing the fifth value.
|
| 102 |
+
NAS, 7/11/99
|
| 103 |
+
*/
|
| 104 |
+
ifstream inf(filename);
|
| 105 |
+
cout << "Reading a/d table from " << filename << "\n";
|
| 106 |
+
if(!inf){
|
| 107 |
+
cerr << "\nERROR: Cannot open " << filename<<"\n";
|
| 108 |
+
return;
|
| 109 |
+
}
|
| 110 |
+
WordIndex w, x, l, m;
|
| 111 |
+
VALTYPE prob;
|
| 112 |
+
while(inf >> w >> x >> l >> m >> prob )
|
| 113 |
+
// the NULL word is added to the length
|
| 114 |
+
// of the sentence in the tables, but discount it when you write the tables.
|
| 115 |
+
setValue(w, x, l, m, prob);
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
template class amodel<COUNT> ;
|
| 119 |
+
//template class amodel<PROB> ;
|
tools/giza-pp/GIZA++-v2/ATables.h
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/* --------------------------------------------------------------------------*
|
| 23 |
+
* *
|
| 24 |
+
* Module :ATables *
|
| 25 |
+
* *
|
| 26 |
+
* Prototypes File: ATables.h *
|
| 27 |
+
* *
|
| 28 |
+
* Objective: Defines clases and methods for handling I/O for distortion & *
|
| 29 |
+
* alignment tables. *
|
| 30 |
+
*****************************************************************************/
|
| 31 |
+
|
| 32 |
+
#ifndef _atables_h
|
| 33 |
+
#define _atables_h 1
|
| 34 |
+
|
| 35 |
+
#include "defs.h"
|
| 36 |
+
#include <cassert>
|
| 37 |
+
#include <iostream>
|
| 38 |
+
#include <algorithm>
|
| 39 |
+
#include <functional>
|
| 40 |
+
#include <map>
|
| 41 |
+
#include <set>
|
| 42 |
+
#include "Vector.h"
|
| 43 |
+
#include <utility>
|
| 44 |
+
#include <fstream>
|
| 45 |
+
#include "Array4.h"
|
| 46 |
+
#include "myassert.h"
|
| 47 |
+
#include "Globals.h"
|
| 48 |
+
|
| 49 |
+
extern bool CompactADTable;
|
| 50 |
+
extern float amodel_smooth_factor;
|
| 51 |
+
extern short NoEmptyWord;
|
| 52 |
+
|
| 53 |
+
/* ------------------- Class Defintions of amodel ---------------------------*/
|
| 54 |
+
/* Class Name: amodel:
|
| 55 |
+
Objective: This defines the underlying data structure for distortiont prob.
|
| 56 |
+
and count tables. They are defined as a hash table. Each entry in the hash
|
| 57 |
+
table is the probability (d(j/l,m,i), where j is word target position, i is
|
| 58 |
+
source word position connected to it, m is target sentence length, and l is
|
| 59 |
+
source sentence length) or count collected for it. The probability and the
|
| 60 |
+
count are represented as log integer probability as
|
| 61 |
+
defined by the class LogProb .
|
| 62 |
+
|
| 63 |
+
This class is used to represents a Tables (probabiliity) and d (distortion)
|
| 64 |
+
tables and also their corresponding count tables .
|
| 65 |
+
|
| 66 |
+
*--------------------------------------------------------------------------*/
|
| 67 |
+
|
| 68 |
+
inline int Mabs(int a)
|
| 69 |
+
{
|
| 70 |
+
if(a<0)
|
| 71 |
+
return -a;
|
| 72 |
+
else
|
| 73 |
+
return a;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
template <class VALTYPE>
|
| 77 |
+
class amodel
|
| 78 |
+
{
|
| 79 |
+
public:
|
| 80 |
+
Array4<VALTYPE> a;
|
| 81 |
+
bool is_distortion ;
|
| 82 |
+
WordIndex MaxSentLength;
|
| 83 |
+
bool ignoreL, ignoreM;
|
| 84 |
+
VALTYPE get(WordIndex aj, WordIndex j, WordIndex l, WordIndex m)const
|
| 85 |
+
{
|
| 86 |
+
massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );massert( (!is_distortion) || aj!=0 );
|
| 87 |
+
massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
|
| 88 |
+
massert( l<MaxSentLength );massert( m<MaxSentLength );
|
| 89 |
+
return a.get(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
|
| 90 |
+
}
|
| 91 |
+
static float smooth_factor;
|
| 92 |
+
amodel(bool flag)
|
| 93 |
+
: a(MAX_SENTENCE_LENGTH+1,0.0), is_distortion(flag), MaxSentLength(MAX_SENTENCE_LENGTH)
|
| 94 |
+
{};
|
| 95 |
+
VALTYPE&getRef(WordIndex aj, WordIndex j, WordIndex l, WordIndex m)
|
| 96 |
+
{
|
| 97 |
+
massert( (!is_distortion) || aj<=m );massert( (!is_distortion) || j<=l );
|
| 98 |
+
massert( is_distortion || aj<=l );massert( is_distortion || j<=m );massert( (is_distortion) || j!=0 );
|
| 99 |
+
massert( l<MaxSentLength );massert( m<MaxSentLength );
|
| 100 |
+
return a(aj, j, (CompactADTable&&is_distortion)?MaxSentLength:(l+1),(CompactADTable&&!is_distortion)?MaxSentLength:(m+1));
|
| 101 |
+
}
|
| 102 |
+
void setValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m, VALTYPE val)
|
| 103 |
+
{
|
| 104 |
+
getRef(aj, j, l, m)=val;
|
| 105 |
+
}
|
| 106 |
+
VALTYPE getValue(WordIndex aj, WordIndex j, WordIndex l, WordIndex m) const
|
| 107 |
+
{
|
| 108 |
+
if( is_distortion==0 )
|
| 109 |
+
return max(double(PROB_SMOOTH),amodel_smooth_factor/(l+1)+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
|
| 110 |
+
else
|
| 111 |
+
return max(double(PROB_SMOOTH),amodel_smooth_factor/m+(1.0-amodel_smooth_factor)*get(aj, j, l, m));
|
| 112 |
+
}
|
| 113 |
+
void printTable(const char* filename)const ;
|
| 114 |
+
template<class COUNT>
|
| 115 |
+
void normalize(amodel<COUNT>& aTable)const
|
| 116 |
+
{
|
| 117 |
+
WordIndex i, j, l, m ;
|
| 118 |
+
COUNT total;
|
| 119 |
+
int nParam=0;
|
| 120 |
+
for(l=0;l<MaxSentLength;l++)
|
| 121 |
+
for(m=0;m<MaxSentLength;m++)
|
| 122 |
+
{
|
| 123 |
+
if( CompactADTable && l!=m )
|
| 124 |
+
continue;
|
| 125 |
+
unsigned int L=((CompactADTable&&is_distortion)?MaxSentLength:(l+1))-1;
|
| 126 |
+
unsigned int M=((CompactADTable&&!is_distortion)?MaxSentLength:(m+1))-1;
|
| 127 |
+
if( is_distortion==0 )
|
| 128 |
+
for(j=1;j<=M; j++)
|
| 129 |
+
{
|
| 130 |
+
total=0.0;
|
| 131 |
+
for(i=0;i<=L;i++)
|
| 132 |
+
{
|
| 133 |
+
total+=get(i, j, L, M);
|
| 134 |
+
}
|
| 135 |
+
if( total )
|
| 136 |
+
for(i=0;i<=L;i++)
|
| 137 |
+
{
|
| 138 |
+
nParam++;
|
| 139 |
+
aTable.getRef(i, j, L, M)=get(i, j, L, M)/total;
|
| 140 |
+
massert(aTable.getRef(i,j,L,M)<=1.0);
|
| 141 |
+
if( NoEmptyWord&&i==0 )
|
| 142 |
+
aTable.getRef(i,j,L,M)=0;
|
| 143 |
+
}
|
| 144 |
+
}
|
| 145 |
+
else
|
| 146 |
+
for(i=0;i<=L;i++)
|
| 147 |
+
{
|
| 148 |
+
total=0.0;
|
| 149 |
+
for(j=1;j<=M;j++)
|
| 150 |
+
total+=get(j, i, L, M);
|
| 151 |
+
if( total )
|
| 152 |
+
for(j=1;j<=M;j++)
|
| 153 |
+
{
|
| 154 |
+
aTable.getRef(j, i, L, M)=amodel_smooth_factor/M+(1.0-amodel_smooth_factor)*get(j, i, L, M)/total;
|
| 155 |
+
nParam++;
|
| 156 |
+
massert(aTable.getRef(j,i,L,M)<=1.0);
|
| 157 |
+
if( NoEmptyWord&&i==0 )
|
| 158 |
+
aTable.getRef(j,i,L,M)=0;
|
| 159 |
+
}
|
| 160 |
+
}
|
| 161 |
+
}
|
| 162 |
+
cout << "A/D table contains " << nParam << " parameters.\n";
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
void readTable(const char *filename);
|
| 166 |
+
void clear()
|
| 167 |
+
{a.clear();}
|
| 168 |
+
};
|
| 169 |
+
|
| 170 |
+
/* ------------------- End of amodel Class Definitions ----------------------*/
|
| 171 |
+
|
| 172 |
+
#endif
|
tools/giza-pp/GIZA++-v2/AlignTables.cpp
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
#include "AlignTables.h"
|
| 23 |
+
|
| 24 |
+
bool alignmodel::insert(Vector<WordIndex>& aj, LogProb val)
|
| 25 |
+
{
|
| 26 |
+
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator i;
|
| 27 |
+
i = a.find(aj);
|
| 28 |
+
if(i != a.end() || val <= 0)
|
| 29 |
+
return false ;
|
| 30 |
+
a.insert(pair<const Vector<WordIndex>, LogProb>(aj, val));
|
| 31 |
+
return true ;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
LogProb alignmodel::getValue(Vector<WordIndex>& align) const
|
| 36 |
+
{
|
| 37 |
+
const LogProb zero = 0.0 ;
|
| 38 |
+
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::const_iterator i;
|
| 39 |
+
i = a.find(align);
|
| 40 |
+
if(i == a.end())
|
| 41 |
+
return zero;
|
| 42 |
+
else
|
| 43 |
+
return (*i).second;
|
| 44 |
+
}
|
tools/giza-pp/GIZA++-v2/AlignTables.h
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
#ifndef _aligntables_h
|
| 23 |
+
#define _aligntables_h 1
|
| 24 |
+
|
| 25 |
+
#include "defs.h"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
#include <cassert>
|
| 29 |
+
|
| 30 |
+
#include <iostream>
|
| 31 |
+
#include <algorithm>
|
| 32 |
+
#include <functional>
|
| 33 |
+
#include <map>
|
| 34 |
+
#include <set>
|
| 35 |
+
//#include <vector>
|
| 36 |
+
#include "Vector.h"
|
| 37 |
+
#include <utility>
|
| 38 |
+
#include <math.h>
|
| 39 |
+
#include <fstream>
|
| 40 |
+
#include "transpair_model1.h"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
/* ----------------- Class Defintions for hashmyalignment --------------------
|
| 44 |
+
Objective: This class is used to define a hash mapping function to map
|
| 45 |
+
an alignment (defined as a vector of integers) into a hash key
|
| 46 |
+
----------------------------------------------------------------------------*/
|
| 47 |
+
|
| 48 |
+
class hashmyalignment : public unary_function< Vector<WordIndex>, size_t >
|
| 49 |
+
{
|
| 50 |
+
public:
|
| 51 |
+
size_t operator() (const Vector<WordIndex>& key) const
|
| 52 |
+
// to define the mapping function. it takes an alignment (a vector of
|
| 53 |
+
// integers) and it returns an integer value (hash key).
|
| 54 |
+
{
|
| 55 |
+
WordIndex j ;
|
| 56 |
+
size_t s ;
|
| 57 |
+
size_t key_sum = 0 ;
|
| 58 |
+
// logmsg << "For alignment:" ;
|
| 59 |
+
for (j = 1 ; j < key.size() ; j++){
|
| 60 |
+
// logmsg << " " << key[j] ;
|
| 61 |
+
key_sum += (size_t) (int) pow(double(key[j]), double((j % 6)+1));
|
| 62 |
+
}
|
| 63 |
+
// logmsg << " , Key value was : " << key_sum;
|
| 64 |
+
s = key_sum % 1000000 ;
|
| 65 |
+
// logmsg << " h(k) = " << s << endl ;
|
| 66 |
+
return(s);
|
| 67 |
+
}
|
| 68 |
+
};
|
| 69 |
+
|
| 70 |
+
class equal_to_myalignment{
|
| 71 |
+
// returns true if two alignments are the same (two vectors have same enties)
|
| 72 |
+
public:
|
| 73 |
+
bool operator()(const Vector<WordIndex> t1,
|
| 74 |
+
const Vector<WordIndex> t2) const
|
| 75 |
+
{WordIndex j ;
|
| 76 |
+
if (t1.size() != t2.size())
|
| 77 |
+
return(false);
|
| 78 |
+
for (j = 1 ; j < t1.size() ; j++)
|
| 79 |
+
if (t1[j] != t2[j])
|
| 80 |
+
return(false);
|
| 81 |
+
return(true);
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
};
|
| 85 |
+
|
| 86 |
+
/* ---------------- End of Class Defnition for hashmyalignment --------------*/
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
/* ------------------ Class Defintions for alignmodel -----------------------
|
| 90 |
+
Class Name: alignmodel
|
| 91 |
+
Objective: Alignments neighborhhoods (collection of alignments) are stored in
|
| 92 |
+
a hash table (for easy lookup). Each alignment vector is mapped into a hash
|
| 93 |
+
key using the operator defined above.
|
| 94 |
+
*--------------------------------------------------------------------------*/
|
| 95 |
+
|
| 96 |
+
class alignmodel{
|
| 97 |
+
private:
|
| 98 |
+
hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment > a;
|
| 99 |
+
private:
|
| 100 |
+
// void erase(Vector<WordIndex>&);
|
| 101 |
+
public:
|
| 102 |
+
|
| 103 |
+
// methods;
|
| 104 |
+
|
| 105 |
+
inline hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator begin(void){return a.begin();} // begining of hash
|
| 106 |
+
inline hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator end(void){return a.end();} // end of hash
|
| 107 |
+
inline const hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >& getHash() const {return a;}; // reference to hash table
|
| 108 |
+
bool insert(Vector<WordIndex>&, LogProb val=0.0); // add a alignmnet
|
| 109 |
+
// void setValue(Vector<WordIndex>&, LogProb val); // not needed
|
| 110 |
+
LogProb getValue(Vector<WordIndex>&)const; // retrieve prob. of alignment
|
| 111 |
+
inline void clear(void){ a.clear();}; // clear hash table
|
| 112 |
+
// void printTable(const char* filename);
|
| 113 |
+
//inline void resize(WordIndex n) {a.resize(n);}; // resize table
|
| 114 |
+
|
| 115 |
+
};
|
| 116 |
+
|
| 117 |
+
/* -------------- End of alignmode Class Definitions ------------------------*/
|
| 118 |
+
#endif
|
tools/giza-pp/GIZA++-v2/Array.h
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef GIZA_ARRAY_H_DEFINED
|
| 2 |
+
#define GIZA_ARRAY_H_DEFINED
|
| 3 |
+
#include "Vector.h"
|
| 4 |
+
#define Array Vector
|
| 5 |
+
#endif
|
tools/giza-pp/GIZA++-v2/Array2.h
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/*--
|
| 23 |
+
Array2: Implementation of a twodimensional checked array allowing for
|
| 24 |
+
a specified underlieing one-dimensional data-structure.
|
| 25 |
+
|
| 26 |
+
Franz Josef Och (30/07/99)
|
| 27 |
+
--*/
|
| 28 |
+
#ifndef CLASS_Array2_DEFINED
|
| 29 |
+
#define CLASS_Array2_DEFINED
|
| 30 |
+
|
| 31 |
+
#include "mystl.h"
|
| 32 |
+
#include <string>
|
| 33 |
+
#include <vector>
|
| 34 |
+
|
| 35 |
+
template<class T, class Y=vector<T> > class Array2
|
| 36 |
+
{
|
| 37 |
+
private:
|
| 38 |
+
Y p;
|
| 39 |
+
// short h1, h2;
|
| 40 |
+
unsigned int h1, h2;
|
| 41 |
+
public:
|
| 42 |
+
Array2(unsigned int _h1, unsigned int _h2)
|
| 43 |
+
: p(_h1*_h2), h1(_h1), h2(_h2) {}
|
| 44 |
+
Array2(unsigned int _h1, unsigned int _h2, const T&_init)
|
| 45 |
+
: p(_h1*_h2, _init), h1(_h1), h2(_h2) {}
|
| 46 |
+
Array2()
|
| 47 |
+
: h1(0), h2(0) {}
|
| 48 |
+
inline T &operator()(unsigned int i, unsigned int j)
|
| 49 |
+
{ assert(i<h1);assert(j<h2);return p[i*h2+j]; }
|
| 50 |
+
inline const T&operator()(unsigned int i, unsigned int j) const
|
| 51 |
+
{ assert(i<h1);assert(j<h2);return p[i*h2+j]; }
|
| 52 |
+
inline T get(unsigned int i, unsigned int j)
|
| 53 |
+
{ assert(i<h1);assert(j<h2);return p[i*h2+j]; }
|
| 54 |
+
inline void set(unsigned int i, unsigned int j, T x)
|
| 55 |
+
{ assert(i<h1);assert(j<h2);p[i*h2+j]=x; }
|
| 56 |
+
inline const T get(unsigned int i, unsigned int j) const
|
| 57 |
+
{ assert(i<h1);assert(j<h2);return p[i*h2+j]; }
|
| 58 |
+
inline unsigned int getLen1() const
|
| 59 |
+
{ return h1; }
|
| 60 |
+
inline unsigned int getLen2() const
|
| 61 |
+
{ return h2; }
|
| 62 |
+
|
| 63 |
+
inline T*begin(){
|
| 64 |
+
if( h1==0||h2==0)return 0;
|
| 65 |
+
return &(p[0]);
|
| 66 |
+
}
|
| 67 |
+
inline T*end(){
|
| 68 |
+
if( h1==0||h2==0)return 0;
|
| 69 |
+
return &(p[0])+p.size();
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
inline const T*begin()const{ return p.begin(); }
|
| 73 |
+
inline const T*end()const{return p.end();}
|
| 74 |
+
|
| 75 |
+
friend ostream&operator<<(ostream&out, const Array2<T, Y>&ar)
|
| 76 |
+
{
|
| 77 |
+
for(unsigned int i=0;i<ar.getLen1();i++)
|
| 78 |
+
{
|
| 79 |
+
//out << i << ": ";
|
| 80 |
+
for(unsigned int j=0;j<ar.getLen2();j++)
|
| 81 |
+
out << ar(i, j) << ' ';
|
| 82 |
+
out << '\n';
|
| 83 |
+
}
|
| 84 |
+
return out << endl;
|
| 85 |
+
}
|
| 86 |
+
inline void resize(unsigned int a,unsigned int b)
|
| 87 |
+
{
|
| 88 |
+
if( !(a==h1&&b==h2))
|
| 89 |
+
{
|
| 90 |
+
h1=a;
|
| 91 |
+
h2=b;
|
| 92 |
+
p.resize(h1*h2);
|
| 93 |
+
}
|
| 94 |
+
}
|
| 95 |
+
inline void resize(unsigned int a,unsigned int b,const T&t)
|
| 96 |
+
{
|
| 97 |
+
if( !(a==h1&&b==h2))
|
| 98 |
+
{
|
| 99 |
+
h1=a;
|
| 100 |
+
h2=b;
|
| 101 |
+
p.resize(h1*h2);
|
| 102 |
+
fill(p.begin(),p.end(),t);
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
};
|
| 106 |
+
|
| 107 |
+
#endif
|
tools/giza-pp/GIZA++-v2/Array4.h
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef AlignmentArray4_h_DEFINED
|
| 24 |
+
#define AlignmentArray4_h_DEFINED
|
| 25 |
+
|
| 26 |
+
#include "Array2.h"
|
| 27 |
+
template<class T> class Array4
|
| 28 |
+
{
|
| 29 |
+
private:
|
| 30 |
+
Array2< Array2<T>* > A;
|
| 31 |
+
int M;
|
| 32 |
+
T init;
|
| 33 |
+
public:
|
| 34 |
+
Array4(int m,const T&_init)
|
| 35 |
+
: A(m,m,0),M(m),init(_init) {}
|
| 36 |
+
~Array4()
|
| 37 |
+
{
|
| 38 |
+
for(int l=0;l<M;++l)
|
| 39 |
+
for(int m=0;m<M;++m)
|
| 40 |
+
delete A(l,m);
|
| 41 |
+
}
|
| 42 |
+
const T&operator()(int i, int j, int l, int m)const
|
| 43 |
+
{
|
| 44 |
+
if( A(l,m)==0 )
|
| 45 |
+
return init;
|
| 46 |
+
else
|
| 47 |
+
return (*A(l,m))(i,j);
|
| 48 |
+
}
|
| 49 |
+
const T&get(int i, int j, int l, int m)const
|
| 50 |
+
{
|
| 51 |
+
if( A(l,m)==0 )
|
| 52 |
+
return init;
|
| 53 |
+
else
|
| 54 |
+
return (*A(l,m))(i,j);
|
| 55 |
+
}
|
| 56 |
+
T&operator()(int i, int j, int l, int m)
|
| 57 |
+
{
|
| 58 |
+
if( A(l,m)==0 )
|
| 59 |
+
{
|
| 60 |
+
A(l,m)=new Array2<T>(max(l+1,m+1),max(l+1,m+1),init);
|
| 61 |
+
}
|
| 62 |
+
return (*A(l,m))(i,j);
|
| 63 |
+
}
|
| 64 |
+
void clear()
|
| 65 |
+
{
|
| 66 |
+
for(int l=0;l<M;++l)
|
| 67 |
+
for(int m=0;m<M;++m)
|
| 68 |
+
if( A(l,m) )
|
| 69 |
+
{
|
| 70 |
+
Array2<T>&a=*A(l,m);
|
| 71 |
+
for(int i=0;i<=l;++i)
|
| 72 |
+
for(int j=0;j<=m;++j)
|
| 73 |
+
a(i,j)=0.0;
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
};
|
| 77 |
+
|
| 78 |
+
#endif
|
tools/giza-pp/GIZA++-v2/D4Tables.h
ADDED
|
@@ -0,0 +1,460 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef _d4tables_h_define
|
| 24 |
+
#define _d4tables_h_define
|
| 25 |
+
#include <math.h>
|
| 26 |
+
#include "WordClasses.h"
|
| 27 |
+
#include "Globals.h"
|
| 28 |
+
#include "myassert.h"
|
| 29 |
+
|
| 30 |
+
extern float d4modelsmooth_factor;
|
| 31 |
+
|
| 32 |
+
class m4_key
|
| 33 |
+
{
|
| 34 |
+
public:
|
| 35 |
+
int deps;
|
| 36 |
+
int l;
|
| 37 |
+
int m;
|
| 38 |
+
int F;
|
| 39 |
+
int E;
|
| 40 |
+
int prevj;
|
| 41 |
+
int vacancies1,vacancies2;
|
| 42 |
+
m4_key(int _deps,int _l,int _m,int _F,int _E,int _prevj,int _v1,int _v2)
|
| 43 |
+
: deps(_deps),l(_l),m(_m),F(_F),E(_E),prevj(_prevj),vacancies1(_v1),vacancies2(_v2) {}
|
| 44 |
+
friend ostream&print1(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
|
| 45 |
+
{
|
| 46 |
+
if(x.deps&DEP_MODEL_l)out << "l: " << x.l<<' ';
|
| 47 |
+
if(x.deps&DEP_MODEL_m)out << "m: " << x.m<<' ';
|
| 48 |
+
if(x.deps&DEP_MODEL_F)out << "F: " << wcf.classString(x.F)<< ' ';
|
| 49 |
+
if(x.deps&DEP_MODEL_E)out << "E: " << wce.classString(x.E)<< ' ';
|
| 50 |
+
// if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' ';
|
| 51 |
+
if(x.vacancies1!=-1)out << "v1: " << x.vacancies1 << ' ';
|
| 52 |
+
if(x.vacancies2!=-1)out << "v2: " << x.vacancies2 << ' ';
|
| 53 |
+
return out << '\n';
|
| 54 |
+
}
|
| 55 |
+
friend ostream&print1_m5(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
|
| 56 |
+
{
|
| 57 |
+
out << ((x.deps&DEP_MODEL_E)?wce.classString(x.E):string("0"))<< ' ';
|
| 58 |
+
out << ((x.deps&DEP_MODEL_F)?wcf.classString(x.F):string("0"))<< ' ';
|
| 59 |
+
out << x.vacancies1 << ' ';
|
| 60 |
+
out << x.vacancies2 << ' ';
|
| 61 |
+
return out;
|
| 62 |
+
}
|
| 63 |
+
friend ostream&printb1(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
|
| 64 |
+
{
|
| 65 |
+
if(x.deps&DEP_MODELb_l)out << "l: " << x.l<<' ';
|
| 66 |
+
if(x.deps&DEP_MODELb_m)out << "m: " << x.m<<' ';
|
| 67 |
+
if(x.deps&DEP_MODELb_F)out << "F: " << wcf.classString(x.F) << ' ';
|
| 68 |
+
if(x.deps&DEP_MODELb_E)out << "E: " << wce.classString(x.E) << ' ';
|
| 69 |
+
if(x.vacancies1!=-1)out << "v1: " << x.vacancies1 << ' ';
|
| 70 |
+
if(x.vacancies2!=-1)out << "v2: " << x.vacancies2 << ' ';
|
| 71 |
+
return out << '\n';
|
| 72 |
+
}
|
| 73 |
+
friend ostream&printb1_m5(ostream&out,const m4_key&x,const WordClasses&wcf)
|
| 74 |
+
{
|
| 75 |
+
out << "-1 " << ((x.deps&DEP_MODEL_F)?wcf.classString(x.F):string("0"))<< ' ';
|
| 76 |
+
out << x.vacancies1 << ' ';
|
| 77 |
+
out << x.vacancies2 << ' ';
|
| 78 |
+
return out;
|
| 79 |
+
}
|
| 80 |
+
};
|
| 81 |
+
|
| 82 |
+
class compare1
|
| 83 |
+
{
|
| 84 |
+
private:
|
| 85 |
+
int deps;
|
| 86 |
+
public:
|
| 87 |
+
compare1(int _deps) : deps(_deps) {}
|
| 88 |
+
bool operator()(const m4_key&a,const m4_key&b)const
|
| 89 |
+
{
|
| 90 |
+
if(deps&DEP_MODEL_l){if( a.l<b.l )return 1;if( b.l<a.l )return 0;}
|
| 91 |
+
if(deps&DEP_MODEL_m){if( a.m<b.m )return 1;if( b.m<a.m )return 0;}
|
| 92 |
+
if(deps&DEP_MODEL_F){if( a.F<b.F )return 1;if( b.F<a.F )return 0;}
|
| 93 |
+
if(deps&DEP_MODEL_E){if( a.E<b.E )return 1;if( b.E<a.E )return 0;}
|
| 94 |
+
//if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;}
|
| 95 |
+
if(a.vacancies1<b.vacancies1)return 1;if(b.vacancies1<a.vacancies1)return 0;
|
| 96 |
+
if(a.vacancies2<b.vacancies2)return 1;if(b.vacancies2<a.vacancies2)return 0;
|
| 97 |
+
return 0;
|
| 98 |
+
}
|
| 99 |
+
};
|
| 100 |
+
|
| 101 |
+
class compareb1
|
| 102 |
+
{
|
| 103 |
+
private:
|
| 104 |
+
int deps;
|
| 105 |
+
public:
|
| 106 |
+
compareb1(int _deps) : deps(_deps) {}
|
| 107 |
+
bool operator()(const m4_key&a,const m4_key&b)const
|
| 108 |
+
{
|
| 109 |
+
if(deps&DEP_MODELb_l){if( a.l<b.l )return 1;if( b.l<a.l )return 0;}
|
| 110 |
+
if(deps&DEP_MODELb_m){if( a.m<b.m )return 1;if( b.m<a.m )return 0;}
|
| 111 |
+
if(deps&DEP_MODELb_F){if( a.F<b.F )return 1;if( b.F<a.F )return 0;}
|
| 112 |
+
if(deps&DEP_MODELb_E){if( a.E<b.E )return 1;if( b.E<a.E )return 0;}
|
| 113 |
+
//if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;}
|
| 114 |
+
if(a.vacancies1<b.vacancies1)return 1;if(b.vacancies1<a.vacancies1)return 0;
|
| 115 |
+
if(a.vacancies2<b.vacancies2)return 1;if(b.vacancies2<a.vacancies2)return 0;
|
| 116 |
+
return 0;
|
| 117 |
+
}
|
| 118 |
+
};
|
| 119 |
+
|
| 120 |
+
inline void tokenize(const string&in,Vector<string>&out)
|
| 121 |
+
{
|
| 122 |
+
string s;
|
| 123 |
+
istringstream l(in);
|
| 124 |
+
while(l>>s)
|
| 125 |
+
out.push_back(s);
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
class d4model
|
| 129 |
+
{
|
| 130 |
+
public:
|
| 131 |
+
typedef Vector<pair<COUNT,PROB> > Vpff;
|
| 132 |
+
map<m4_key,Vpff,compare1 > D1;
|
| 133 |
+
map<m4_key,Vpff,compareb1> Db1;
|
| 134 |
+
PositionIndex msl;
|
| 135 |
+
WordClasses ewordclasses;
|
| 136 |
+
WordClasses fwordclasses;
|
| 137 |
+
template<class MAPPER>
|
| 138 |
+
void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile)
|
| 139 |
+
{
|
| 140 |
+
ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
|
| 141 |
+
if( !estrm )
|
| 142 |
+
{
|
| 143 |
+
cerr << "ERROR: can not read " << efile << endl;
|
| 144 |
+
}
|
| 145 |
+
else
|
| 146 |
+
ewordclasses.read(estrm,m1);
|
| 147 |
+
if( !fstrm )
|
| 148 |
+
cerr << "ERROR: can not read " << ffile << endl;
|
| 149 |
+
else
|
| 150 |
+
fwordclasses.read(fstrm,m2);
|
| 151 |
+
}
|
| 152 |
+
d4model(PositionIndex _msl)
|
| 153 |
+
: D1(compare1(M4_Dependencies)),Db1(compareb1(M4_Dependencies)),msl(_msl)
|
| 154 |
+
{}
|
| 155 |
+
COUNT&getCountRef_first(WordIndex j,WordIndex j_cp,int E,int F,int l,int m)
|
| 156 |
+
{
|
| 157 |
+
assert(j>=1);
|
| 158 |
+
m4_key key(M4_Dependencies,l,m,F,E,j_cp,-1,-1);
|
| 159 |
+
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
|
| 160 |
+
if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
| 161 |
+
assert(p!=D1.end());
|
| 162 |
+
return (p->second)[j-j_cp+msl].first;
|
| 163 |
+
}
|
| 164 |
+
COUNT&getCountRef_bigger(WordIndex j,WordIndex j_prev,int E,int F,int l,int m)
|
| 165 |
+
{
|
| 166 |
+
assert(j>=1);
|
| 167 |
+
assert(j_prev>=1);
|
| 168 |
+
m4_key key(M4_Dependencies,l,m,F,E,j_prev,-1,-1);
|
| 169 |
+
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
|
| 170 |
+
if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
|
| 171 |
+
assert(p!=Db1.end());
|
| 172 |
+
return (p->second)[j-j_prev+msl].first;
|
| 173 |
+
}
|
| 174 |
+
map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E,int F,int l,int m)const
|
| 175 |
+
{return D1.find(m4_key(M4_Dependencies,l,m,F,E,0,-1,-1));}
|
| 176 |
+
PROB getProb_first_withiterator(WordIndex j,WordIndex j_cp,int m,const map<m4_key,Vpff,compare1 >::const_iterator& p)const
|
| 177 |
+
{
|
| 178 |
+
assert(j>=1);//assert(j_cp>=0);
|
| 179 |
+
assert(j<=msl);assert(j_cp<=msl);
|
| 180 |
+
if(p==D1.end())
|
| 181 |
+
{
|
| 182 |
+
return PROB_SMOOTH;
|
| 183 |
+
}
|
| 184 |
+
else
|
| 185 |
+
{
|
| 186 |
+
massert((p->second)[j-j_cp+msl].second<=1.0);
|
| 187 |
+
return max(PROB_SMOOTH,d4modelsmooth_factor/(2*m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
PROB getProb_first(WordIndex j,WordIndex j_cp,int E,int F,int l,int m)const
|
| 191 |
+
{
|
| 192 |
+
assert(j>=1);//assert(j_cp>=0);
|
| 193 |
+
assert(j<=msl);assert(j_cp<=msl);
|
| 194 |
+
m4_key key(M4_Dependencies,l,m,F,E,j_cp,-1,-1);
|
| 195 |
+
map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
|
| 196 |
+
if(p==D1.end())
|
| 197 |
+
{
|
| 198 |
+
return PROB_SMOOTH;
|
| 199 |
+
}
|
| 200 |
+
else
|
| 201 |
+
{
|
| 202 |
+
massert((p->second)[j-j_cp+msl].second<=1.0);
|
| 203 |
+
return max(PROB_SMOOTH,d4modelsmooth_factor/(2*m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
|
| 204 |
+
}
|
| 205 |
+
}
|
| 206 |
+
map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E,int F,int l,int m)const
|
| 207 |
+
{
|
| 208 |
+
return Db1.find(m4_key(M4_Dependencies,l,m,F,E,0,-1,-1));
|
| 209 |
+
}
|
| 210 |
+
PROB getProb_bigger_withiterator(WordIndex j,WordIndex j_prev,int m,const map<m4_key,Vpff,compareb1 >::const_iterator&p)const
|
| 211 |
+
{
|
| 212 |
+
massert(j>=1);massert(j_prev>=1);
|
| 213 |
+
massert(j>j_prev);
|
| 214 |
+
massert(j<=msl);massert(j_prev<=msl);
|
| 215 |
+
if(p==Db1.end())
|
| 216 |
+
{
|
| 217 |
+
return PROB_SMOOTH;
|
| 218 |
+
}
|
| 219 |
+
else
|
| 220 |
+
{
|
| 221 |
+
massert((p->second)[j-j_prev+msl].second<=1.0 );
|
| 222 |
+
return max(PROB_SMOOTH,d4modelsmooth_factor/(m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
|
| 223 |
+
}
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
PROB getProb_bigger(WordIndex j,WordIndex j_prev,int E,int F,int l,int m)const
|
| 227 |
+
{
|
| 228 |
+
massert(j>=1);massert(j_prev>=1);
|
| 229 |
+
massert(j>j_prev);
|
| 230 |
+
massert(j<=msl);massert(j_prev<=msl);
|
| 231 |
+
m4_key key(M4_Dependencies,l,m,F,E,j_prev,-1,-1);
|
| 232 |
+
map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
|
| 233 |
+
if(p==Db1.end())
|
| 234 |
+
{
|
| 235 |
+
return PROB_SMOOTH;
|
| 236 |
+
}
|
| 237 |
+
else
|
| 238 |
+
{
|
| 239 |
+
massert((p->second)[j-j_prev+msl].second<=1.0 );
|
| 240 |
+
return max(PROB_SMOOTH,d4modelsmooth_factor/(m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
|
| 241 |
+
}
|
| 242 |
+
}
|
| 243 |
+
void normalizeTable()
|
| 244 |
+
{
|
| 245 |
+
int nParams=0;
|
| 246 |
+
for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
|
| 247 |
+
{
|
| 248 |
+
Vpff&d1=i->second;
|
| 249 |
+
double sum=0.0;
|
| 250 |
+
for(PositionIndex i=0;i<d1.size();i++)
|
| 251 |
+
sum+=d1[i].first;
|
| 252 |
+
for(PositionIndex i=0;i<d1.size();i++)
|
| 253 |
+
{
|
| 254 |
+
d1[i].second=sum?(d1[i].first/sum):(1.0/d1.size());
|
| 255 |
+
nParams++;
|
| 256 |
+
}
|
| 257 |
+
}
|
| 258 |
+
for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
|
| 259 |
+
{
|
| 260 |
+
Vpff&db1=i->second;
|
| 261 |
+
double sum=0.0;
|
| 262 |
+
for(PositionIndex i=0;i<db1.size();i++)
|
| 263 |
+
sum+=db1[i].first;
|
| 264 |
+
for(PositionIndex i=0;i<db1.size();i++)
|
| 265 |
+
{
|
| 266 |
+
db1[i].second=sum?(db1[i].first/sum):(1.0/db1.size());
|
| 267 |
+
nParams++;
|
| 268 |
+
}
|
| 269 |
+
}
|
| 270 |
+
cout << "D4 table contains " << nParams << " parameters.\n";
|
| 271 |
+
}
|
| 272 |
+
void clear()
|
| 273 |
+
{
|
| 274 |
+
for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
|
| 275 |
+
{
|
| 276 |
+
Vpff&d1=i->second;
|
| 277 |
+
for(PositionIndex i=0;i<d1.size();i++)
|
| 278 |
+
d1[i].first=0.0;
|
| 279 |
+
}
|
| 280 |
+
for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
|
| 281 |
+
{
|
| 282 |
+
Vpff&db1=i->second;
|
| 283 |
+
for(PositionIndex i=0;i<db1.size();i++)
|
| 284 |
+
db1[i].first=0.0;
|
| 285 |
+
}
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
void printProbTable(const char*fname1,const char*fname2)
|
| 289 |
+
{
|
| 290 |
+
ofstream out(fname1);
|
| 291 |
+
double ssum=0.0;
|
| 292 |
+
out << "# Translation tables for Model 4 .\n";
|
| 293 |
+
out << "# Table for head of cept.\n";
|
| 294 |
+
for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
|
| 295 |
+
{
|
| 296 |
+
const Vpff&d1=i->second;
|
| 297 |
+
double sum=0.0;
|
| 298 |
+
for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
|
| 299 |
+
if ( sum )
|
| 300 |
+
{
|
| 301 |
+
print1(out,i->first,ewordclasses,fwordclasses);
|
| 302 |
+
out << "SUM: " << sum << ' '<< '\n';
|
| 303 |
+
for(unsigned ii=0;ii<d1.size();ii++)
|
| 304 |
+
if( d1[ii].first )
|
| 305 |
+
out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n';
|
| 306 |
+
out << endl;
|
| 307 |
+
}
|
| 308 |
+
ssum+=sum;
|
| 309 |
+
}
|
| 310 |
+
out << "# Table for non-head of cept.\n";
|
| 311 |
+
for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
|
| 312 |
+
{
|
| 313 |
+
const Vpff&db1=i->second;
|
| 314 |
+
double sum=0.0;
|
| 315 |
+
for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
|
| 316 |
+
if( sum )
|
| 317 |
+
{
|
| 318 |
+
printb1(out,i->first,ewordclasses,fwordclasses);
|
| 319 |
+
out << "SUM: " << sum << ' '<<'\n';
|
| 320 |
+
for(unsigned ii=0;ii<db1.size();ii++)
|
| 321 |
+
if( db1[ii].first )
|
| 322 |
+
{
|
| 323 |
+
out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n';
|
| 324 |
+
}
|
| 325 |
+
out << endl;
|
| 326 |
+
}
|
| 327 |
+
ssum+=sum;
|
| 328 |
+
}
|
| 329 |
+
out << endl << "FULL-SUM: " << ssum << endl;
|
| 330 |
+
if( M4_Dependencies==76 )
|
| 331 |
+
{
|
| 332 |
+
ofstream out2(fname2);
|
| 333 |
+
for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
|
| 334 |
+
{
|
| 335 |
+
const Vpff&d1=i->second;
|
| 336 |
+
for(unsigned ii=0;ii<d1.size();ii++)
|
| 337 |
+
if( d1[ii].first )
|
| 338 |
+
out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n';
|
| 339 |
+
}
|
| 340 |
+
for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
|
| 341 |
+
{
|
| 342 |
+
const Vpff&db1=i->second;
|
| 343 |
+
for(unsigned ii=0;ii<db1.size();ii++)
|
| 344 |
+
if( db1[ii].first )
|
| 345 |
+
out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n';
|
| 346 |
+
}
|
| 347 |
+
}
|
| 348 |
+
}
|
| 349 |
+
bool readProbTable(const char *fname)
|
| 350 |
+
{
|
| 351 |
+
cerr << "Reading D4Tables from " << fname << endl;
|
| 352 |
+
ifstream file(fname);
|
| 353 |
+
string line;
|
| 354 |
+
do
|
| 355 |
+
{
|
| 356 |
+
getline(file,line);
|
| 357 |
+
} while(line.length()&&line[0]=='#');
|
| 358 |
+
|
| 359 |
+
do
|
| 360 |
+
{
|
| 361 |
+
while(line.length()==0)
|
| 362 |
+
getline(file,line);
|
| 363 |
+
if( line[0]=='#')
|
| 364 |
+
break;
|
| 365 |
+
Vector<string> linestr;
|
| 366 |
+
tokenize(line,linestr);
|
| 367 |
+
m4_key k(M4_Dependencies,0,0,0,0,0,-1,-1);
|
| 368 |
+
for(unsigned int i=0;i<linestr.size();i+=2)
|
| 369 |
+
{
|
| 370 |
+
if( linestr[i]=="l:" ){k.l=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_l);}
|
| 371 |
+
if( linestr[i]=="m:" ){k.m=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_m);}
|
| 372 |
+
if( linestr[i]=="F:" ){k.F=fwordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODEL_F);}
|
| 373 |
+
if( linestr[i]=="E:" ){k.E=ewordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODEL_E);}
|
| 374 |
+
//if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);}
|
| 375 |
+
}
|
| 376 |
+
string str;
|
| 377 |
+
double sum;
|
| 378 |
+
file >> str >> sum;
|
| 379 |
+
iassert(str=="SUM:");
|
| 380 |
+
if( str!="SUM:")
|
| 381 |
+
cerr << "ERROR: string is " << str << " and not sum " << endl;
|
| 382 |
+
|
| 383 |
+
do
|
| 384 |
+
{
|
| 385 |
+
int value;
|
| 386 |
+
double count;
|
| 387 |
+
getline(file,line);
|
| 388 |
+
istringstream twonumbers(line);
|
| 389 |
+
if(twonumbers >> value >> count)
|
| 390 |
+
{
|
| 391 |
+
if( D1.count(k)==0 )
|
| 392 |
+
D1.insert(make_pair(k,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0))));
|
| 393 |
+
D1[k][value+msl]=make_pair(count,count/sum);
|
| 394 |
+
}
|
| 395 |
+
}while(line.length());
|
| 396 |
+
}while(file);
|
| 397 |
+
do
|
| 398 |
+
{
|
| 399 |
+
getline(file,line);
|
| 400 |
+
} while(line.length()&&line[0]=='#');
|
| 401 |
+
do
|
| 402 |
+
{
|
| 403 |
+
while(line.length()==0)
|
| 404 |
+
getline(file,line);
|
| 405 |
+
if( line[0]=='#')
|
| 406 |
+
break;
|
| 407 |
+
Vector<string> linestr;
|
| 408 |
+
tokenize(line,linestr);
|
| 409 |
+
m4_key k(M4_Dependencies,0,0,0,0,0,-1,-1);
|
| 410 |
+
bool sumRead=0;
|
| 411 |
+
for(unsigned int i=0;i<linestr.size();i+=2)
|
| 412 |
+
{
|
| 413 |
+
if( linestr[i]=="l:" ){k.l=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODELb_l);}
|
| 414 |
+
else if( linestr[i]=="m:" ){k.m=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODELb_m);}
|
| 415 |
+
else if( linestr[i]=="F:" ){k.F=fwordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODELb_F);}
|
| 416 |
+
else if( linestr[i]=="E:" ){k.E=ewordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODELb_E);}
|
| 417 |
+
else if( linestr[i]=="SUM:" )
|
| 418 |
+
{
|
| 419 |
+
cerr << "Warning: obviously no dependency.\n";
|
| 420 |
+
sumRead=1;
|
| 421 |
+
}
|
| 422 |
+
else if( linestr[i]=="FULL-SUM:" )
|
| 423 |
+
{
|
| 424 |
+
break;
|
| 425 |
+
}
|
| 426 |
+
else
|
| 427 |
+
{
|
| 428 |
+
cerr << "ERROR: error in reading d4 tables: " << linestr[i] << ' ' << linestr[i+1] << endl;
|
| 429 |
+
}
|
| 430 |
+
}
|
| 431 |
+
string str;
|
| 432 |
+
double sum;
|
| 433 |
+
if( sumRead==0 )
|
| 434 |
+
file >> str >> sum;
|
| 435 |
+
else
|
| 436 |
+
{
|
| 437 |
+
str=linestr[0];
|
| 438 |
+
sum=atof(linestr[1].c_str());
|
| 439 |
+
}
|
| 440 |
+
if( str!="SUM:" )
|
| 441 |
+
cerr << "ERROR: should read SUM but read " << str << endl;
|
| 442 |
+
do
|
| 443 |
+
{
|
| 444 |
+
int value;
|
| 445 |
+
double count;
|
| 446 |
+
getline(file,line);
|
| 447 |
+
istringstream twonumbers(line);
|
| 448 |
+
if(twonumbers >> value >> count)
|
| 449 |
+
{
|
| 450 |
+
if( Db1.count(k)==0 )
|
| 451 |
+
Db1.insert(make_pair(k,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0))));
|
| 452 |
+
Db1[k][value+msl]=make_pair(count,count/sum);
|
| 453 |
+
}
|
| 454 |
+
}while(file&&line.length());
|
| 455 |
+
}while(file);
|
| 456 |
+
return 1;
|
| 457 |
+
}
|
| 458 |
+
};
|
| 459 |
+
|
| 460 |
+
#endif
|
tools/giza-pp/GIZA++-v2/D5Tables.h
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef _d5tables_h_define
|
| 24 |
+
#define _d5tables_h_define
|
| 25 |
+
#include <math.h>
|
| 26 |
+
#include "D4Tables.h"
|
| 27 |
+
|
| 28 |
+
extern float d5modelsmooth_countoffset;
|
| 29 |
+
extern float d5modelsmooth_factor;
|
| 30 |
+
|
| 31 |
+
#define UNSEENPROB (1.0/vacancies_total)
|
| 32 |
+
|
| 33 |
+
class d5model
|
| 34 |
+
{
|
| 35 |
+
private:
|
| 36 |
+
typedef Vector < pair < COUNT,PROB > >Vpff;
|
| 37 |
+
map< m4_key,Vpff,compare1 > D1;
|
| 38 |
+
map< m4_key,Vpff,compareb1 > Db1;
|
| 39 |
+
public:
|
| 40 |
+
d4model&d4m;
|
| 41 |
+
WordClasses ewordclasses,fwordclasses;
|
| 42 |
+
template<class MAPPER>
|
| 43 |
+
void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile)
|
| 44 |
+
{
|
| 45 |
+
ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
|
| 46 |
+
if( !estrm )
|
| 47 |
+
cerr << "ERROR: can not read classes from " << efile << endl;
|
| 48 |
+
else
|
| 49 |
+
ewordclasses.read(estrm,m1);
|
| 50 |
+
if( !fstrm )
|
| 51 |
+
cerr << "ERROR: can not read classes from " << ffile << endl;
|
| 52 |
+
else
|
| 53 |
+
fwordclasses.read(fstrm,m2);
|
| 54 |
+
}
|
| 55 |
+
d5model (d4model&_d4m)
|
| 56 |
+
:D1 (compare1(M5_Dependencies)), Db1 (compareb1(M5_Dependencies)),d4m(_d4m)
|
| 57 |
+
{}
|
| 58 |
+
COUNT &getCountRef_first (PositionIndex vacancies_j,
|
| 59 |
+
PositionIndex vacancies_jp, int F,
|
| 60 |
+
PositionIndex l, PositionIndex m,
|
| 61 |
+
PositionIndex vacancies_total)
|
| 62 |
+
{
|
| 63 |
+
massert(vacancies_j>0);
|
| 64 |
+
massert(vacancies_total>0);
|
| 65 |
+
//massert(vacancies_jp<=vacancies_total);
|
| 66 |
+
massert(vacancies_j <=vacancies_total);
|
| 67 |
+
massert(vacancies_total<=m);
|
| 68 |
+
m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
|
| 69 |
+
map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
|
| 70 |
+
if(p==D1.end())
|
| 71 |
+
p=D1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
|
| 72 |
+
massert(p!=D1.end());
|
| 73 |
+
return (p->second)[vacancies_j].first;
|
| 74 |
+
}
|
| 75 |
+
COUNT &getCountRef_bigger (PositionIndex vacancies_j,
|
| 76 |
+
PositionIndex vacancies_jp, int F,
|
| 77 |
+
PositionIndex l, PositionIndex m,
|
| 78 |
+
PositionIndex vacancies_total)
|
| 79 |
+
{
|
| 80 |
+
massert(vacancies_j>0);
|
| 81 |
+
massert(vacancies_total>0);
|
| 82 |
+
massert (vacancies_jp <= vacancies_j);
|
| 83 |
+
massert (vacancies_j-vacancies_jp <= vacancies_total);
|
| 84 |
+
m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
|
| 85 |
+
map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
|
| 86 |
+
if(p==Db1.end())
|
| 87 |
+
p=Db1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
|
| 88 |
+
massert(p!=Db1.end());
|
| 89 |
+
return (p->second)[vacancies_j - vacancies_jp].first;
|
| 90 |
+
}
|
| 91 |
+
PROB getProb_first (PositionIndex vacancies_j, PositionIndex vacancies_jp,
|
| 92 |
+
int F, PositionIndex l, PositionIndex m,
|
| 93 |
+
PositionIndex vacancies_total) const
|
| 94 |
+
{
|
| 95 |
+
massert(vacancies_j>0);
|
| 96 |
+
massert(vacancies_total>0);
|
| 97 |
+
//massert(vacancies_jp<=vacancies_total);
|
| 98 |
+
massert(vacancies_j <=vacancies_total);
|
| 99 |
+
massert(vacancies_total<=m);
|
| 100 |
+
m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
|
| 101 |
+
map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
|
| 102 |
+
if( p==D1.end() )
|
| 103 |
+
return UNSEENPROB;
|
| 104 |
+
else
|
| 105 |
+
return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j].second);
|
| 106 |
+
}
|
| 107 |
+
PROB getProb_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp,
|
| 108 |
+
int F, PositionIndex l, PositionIndex m,
|
| 109 |
+
PositionIndex vacancies_total) const
|
| 110 |
+
{
|
| 111 |
+
massert(vacancies_j>0);
|
| 112 |
+
massert(vacancies_total>0);
|
| 113 |
+
massert (vacancies_jp <= vacancies_j);
|
| 114 |
+
massert (vacancies_j-vacancies_jp <= vacancies_total);
|
| 115 |
+
m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
|
| 116 |
+
map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
|
| 117 |
+
if(p==Db1.end())
|
| 118 |
+
return UNSEENPROB;
|
| 119 |
+
else
|
| 120 |
+
return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j - vacancies_jp].second);
|
| 121 |
+
}
|
| 122 |
+
void normalizeTable ()
|
| 123 |
+
{
|
| 124 |
+
int nParams=0;
|
| 125 |
+
for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
|
| 126 |
+
{
|
| 127 |
+
Vpff&d1=i->second;
|
| 128 |
+
COUNT sum=0.0;
|
| 129 |
+
for(PositionIndex i=0;i<d1.size();i++)
|
| 130 |
+
sum+=d1[i].first+d5modelsmooth_countoffset;
|
| 131 |
+
for(PositionIndex i=0;i<d1.size();i++)
|
| 132 |
+
{
|
| 133 |
+
d1[i].second=sum?((d1[i].first+d5modelsmooth_countoffset)/sum):(1.0/d1.size());
|
| 134 |
+
nParams++;
|
| 135 |
+
}
|
| 136 |
+
}
|
| 137 |
+
for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
|
| 138 |
+
{
|
| 139 |
+
Vpff&db1=i->second;
|
| 140 |
+
double sum=0.0;
|
| 141 |
+
for(PositionIndex i=0;i<db1.size();i++)
|
| 142 |
+
sum+=db1[i].first+d5modelsmooth_countoffset;
|
| 143 |
+
for(PositionIndex i=0;i<db1.size();i++)
|
| 144 |
+
{
|
| 145 |
+
db1[i].second=sum?((db1[i].first+d5modelsmooth_countoffset)/sum):(1.0/db1.size());
|
| 146 |
+
nParams++;
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
cout << "D5 table contains " << nParams << " parameters.\n";
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
friend ostream&operator<<(ostream&out,d5model&d5m)
|
| 153 |
+
{
|
| 154 |
+
out << "# Translation tables for Model 5 .\n";
|
| 155 |
+
out << "# Table for head of cept.\n";
|
| 156 |
+
for(map<m4_key,Vpff,compare1 >::const_iterator i=d5m.D1.begin();i!=d5m.D1.end();++i)
|
| 157 |
+
{
|
| 158 |
+
const Vpff&d1=i->second;
|
| 159 |
+
COUNT sum=0.0;
|
| 160 |
+
for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
|
| 161 |
+
if ( sum )
|
| 162 |
+
{
|
| 163 |
+
for(unsigned ii=0;ii<d1.size();ii++)
|
| 164 |
+
{
|
| 165 |
+
print1_m5(out,i->first,d5m.ewordclasses,d5m.fwordclasses);
|
| 166 |
+
out << (int)(ii) << ' ' << d1[ii].second << ' ' << d1[ii].first << '\n';
|
| 167 |
+
}
|
| 168 |
+
out << endl;
|
| 169 |
+
}
|
| 170 |
+
}
|
| 171 |
+
out << "# Table for non-head of cept.\n";
|
| 172 |
+
for(map<m4_key,Vpff,compareb1 >::const_iterator i=d5m.Db1.begin();i!=d5m.Db1.end();++i)
|
| 173 |
+
{
|
| 174 |
+
const Vpff&db1=i->second;
|
| 175 |
+
double sum=0.0;
|
| 176 |
+
for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
|
| 177 |
+
if( sum )
|
| 178 |
+
{
|
| 179 |
+
for(unsigned ii=0;ii<db1.size();ii++)
|
| 180 |
+
{
|
| 181 |
+
printb1_m5(out,i->first,d5m.fwordclasses);
|
| 182 |
+
out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n';
|
| 183 |
+
}
|
| 184 |
+
out << endl;
|
| 185 |
+
}
|
| 186 |
+
}
|
| 187 |
+
return out;
|
| 188 |
+
}
|
| 189 |
+
void readProbTable(const char*x)
|
| 190 |
+
{
|
| 191 |
+
ifstream f(x);
|
| 192 |
+
string l;
|
| 193 |
+
while(getline(f,l))
|
| 194 |
+
{
|
| 195 |
+
if(l.length()&&l[0]=='#')
|
| 196 |
+
continue;
|
| 197 |
+
istringstream is(l.c_str());
|
| 198 |
+
string E,F;
|
| 199 |
+
int v1,v2,ii;
|
| 200 |
+
double prob,count;
|
| 201 |
+
if(is>>E>>F>>v1>>v2>>ii>>prob>>count)
|
| 202 |
+
{
|
| 203 |
+
//cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl;
|
| 204 |
+
if( count>0 )
|
| 205 |
+
if( E=="-1")
|
| 206 |
+
getCountRef_bigger(ii,0,fwordclasses(F),1000,1000,v2)+=count;
|
| 207 |
+
else
|
| 208 |
+
getCountRef_first(ii,v1,fwordclasses(F),1000,1000,v2)+=count;
|
| 209 |
+
}
|
| 210 |
+
}
|
| 211 |
+
normalizeTable();
|
| 212 |
+
ofstream of("M5FILE");
|
| 213 |
+
of << (*this);
|
| 214 |
+
}
|
| 215 |
+
void clear()
|
| 216 |
+
{
|
| 217 |
+
for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
|
| 218 |
+
{
|
| 219 |
+
Vpff&d1=i->second;
|
| 220 |
+
for(PositionIndex i=0;i<d1.size();i++)
|
| 221 |
+
d1[i].first=0.0;
|
| 222 |
+
}
|
| 223 |
+
for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
|
| 224 |
+
{
|
| 225 |
+
Vpff&db1=i->second;
|
| 226 |
+
for(PositionIndex i=0;i<db1.size();i++)
|
| 227 |
+
db1[i].first=0.0;
|
| 228 |
+
}
|
| 229 |
+
}
|
| 230 |
+
};
|
| 231 |
+
|
| 232 |
+
#endif
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
|
tools/giza-pp/GIZA++-v2/Dictionary.cpp
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/* Noah A. Smith
|
| 23 |
+
Dictionary object for dictionary filter in Model 1 training
|
| 24 |
+
|
| 25 |
+
Dictionary file must be in order (sorted) by Foreign vocab id, but English
|
| 26 |
+
vocab ids may be in any order.
|
| 27 |
+
|
| 28 |
+
9 August 1999
|
| 29 |
+
*/
|
| 30 |
+
|
| 31 |
+
#include "Dictionary.h"
|
| 32 |
+
|
| 33 |
+
#include <cstring>
|
| 34 |
+
|
| 35 |
+
Dictionary::Dictionary(const char *filename){
|
| 36 |
+
if(!strcmp(filename, "")){
|
| 37 |
+
dead = true;
|
| 38 |
+
return;
|
| 39 |
+
}
|
| 40 |
+
dead = false;
|
| 41 |
+
cout << "Reading dictionary from: " << filename << '\n';
|
| 42 |
+
ifstream dFile(filename);
|
| 43 |
+
if(!dFile){
|
| 44 |
+
cerr << "ERROR: Can't open dictionary: " << filename << '\n';
|
| 45 |
+
exit(1);
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
currindexmin = 0;
|
| 49 |
+
currindexmax = 0;
|
| 50 |
+
currval = 0;
|
| 51 |
+
int p, q;
|
| 52 |
+
while((dFile >> p >> q)){
|
| 53 |
+
pairs[0].push_back(p);
|
| 54 |
+
pairs[1].push_back(q);
|
| 55 |
+
}
|
| 56 |
+
cout << "Dictionary read; " << pairs[0].size() << " pairs loaded." << '\n';
|
| 57 |
+
dFile.close();
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
bool Dictionary::indict(int p, int q){
|
| 62 |
+
if(dead) return false;
|
| 63 |
+
if(p == 0 && q == 0) return false;
|
| 64 |
+
if(currval == p){
|
| 65 |
+
for(int i = currindexmin; i <= currindexmax; i++)
|
| 66 |
+
if(pairs[1][i] == q) return true;
|
| 67 |
+
return false;
|
| 68 |
+
}
|
| 69 |
+
else{
|
| 70 |
+
int begin = 0, end = pairs[0].size() - 1, middle = 0;
|
| 71 |
+
unsigned int t;
|
| 72 |
+
bool ret = false;
|
| 73 |
+
while(begin <= end){
|
| 74 |
+
middle = begin + ((end - begin) >> 1);
|
| 75 |
+
if(p < pairs[0][middle]) end = middle - 1;
|
| 76 |
+
else if(p > pairs[0][middle]) begin = middle + 1;
|
| 77 |
+
else{
|
| 78 |
+
break;
|
| 79 |
+
}
|
| 80 |
+
}
|
| 81 |
+
t = middle;
|
| 82 |
+
while(pairs[0][t] == p )
|
| 83 |
+
if(pairs[1][t--] == q) ret = true;
|
| 84 |
+
currindexmin = t + 1;
|
| 85 |
+
t = middle + 1;
|
| 86 |
+
while(pairs[0][t] == p && t < pairs[0].size())
|
| 87 |
+
if(pairs[1][t++] == q) ret = true;
|
| 88 |
+
currindexmax = t - 1;
|
| 89 |
+
currval = p;
|
| 90 |
+
return ret;
|
| 91 |
+
}
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
|
tools/giza-pp/GIZA++-v2/Dictionary.h
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/* Noah A. Smith
|
| 23 |
+
Dictionary object for dictionary filter in Model 1 training
|
| 24 |
+
|
| 25 |
+
9 August 1999
|
| 26 |
+
*/
|
| 27 |
+
|
| 28 |
+
#include <iostream>
|
| 29 |
+
#include <fstream>
|
| 30 |
+
|
| 31 |
+
#include "Vector.h"
|
| 32 |
+
|
| 33 |
+
#ifndef DICTIONARY_H
|
| 34 |
+
#define DICTIONARY_H
|
| 35 |
+
|
| 36 |
+
class Dictionary{
|
| 37 |
+
private:
|
| 38 |
+
Vector<int> pairs[2];
|
| 39 |
+
int currval;
|
| 40 |
+
int currindexmin;
|
| 41 |
+
int currindexmax;
|
| 42 |
+
bool dead;
|
| 43 |
+
public:
|
| 44 |
+
Dictionary(const char *);
|
| 45 |
+
bool indict(int, int);
|
| 46 |
+
};
|
| 47 |
+
|
| 48 |
+
#endif
|
tools/giza-pp/GIZA++-v2/FlexArray.h
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1988,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef CLASS_FlexArray_defined
|
| 24 |
+
#define CLASS_FlexArray_defined
|
| 25 |
+
#include "Array.h"
|
| 26 |
+
#include <iostream>
|
| 27 |
+
#include <fstream>
|
| 28 |
+
template<class T>
|
| 29 |
+
class FlexArray
|
| 30 |
+
{
|
| 31 |
+
private:
|
| 32 |
+
Array<T> p;
|
| 33 |
+
int start,End;
|
| 34 |
+
public:
|
| 35 |
+
FlexArray(int _start=0,int _end=-1)
|
| 36 |
+
: p(_end-_start+1),start(_start),End(_end) {}
|
| 37 |
+
FlexArray(int _start,int _end,const T&init)
|
| 38 |
+
: p(_end-_start+1,init),start(_start),End(_end) {}
|
| 39 |
+
T&operator[](int i)
|
| 40 |
+
{return p[i-start];}
|
| 41 |
+
const T&operator[](int i)const
|
| 42 |
+
{return p[i-start];}
|
| 43 |
+
int low()const{return start;}
|
| 44 |
+
int high()const{return End;}
|
| 45 |
+
T*begin(){return conv<double>(p.begin());}
|
| 46 |
+
T*end(){return conv<double>(p.end());}
|
| 47 |
+
};
|
| 48 |
+
|
| 49 |
+
template<class T>
|
| 50 |
+
inline ostream&operator<<(ostream&out,const FlexArray<T>&x)
|
| 51 |
+
{
|
| 52 |
+
for(int i=x.low();i<=x.high();++i)
|
| 53 |
+
out << i << ':' << x[i] << ';' << ' ';
|
| 54 |
+
return out;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
#endif
|
tools/giza-pp/GIZA++-v2/ForwardBackward.cpp
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef NO_TRAINING
|
| 24 |
+
#include "ForwardBackward.h"
|
| 25 |
+
#include "Globals.h"
|
| 26 |
+
#include "myassert.h"
|
| 27 |
+
#include "HMMTables.h"
|
| 28 |
+
#include "mymath.h"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
double ForwardBackwardTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
|
| 32 |
+
const int I=net.size1(),J=net.size2(),N=I*J;
|
| 33 |
+
Array<double> alpha(N,0),beta(N,0),sum(J);
|
| 34 |
+
for(int i=0;i<I;i++)
|
| 35 |
+
beta[N-I+i]=net.getBetainit(i);
|
| 36 |
+
double * cur_beta=conv<double>(beta.begin())+N-I-1;
|
| 37 |
+
for(int j=J-2;j>=0;--j)
|
| 38 |
+
for(int ti=I-1;ti>=0;--ti,--cur_beta) {
|
| 39 |
+
const double *next_beta=conv<double>(beta.begin())+(j+1)*I;
|
| 40 |
+
const double *alprob=&net.outProb(j,ti,0),*next_node=&net.nodeProb(0,j+1);
|
| 41 |
+
for(int ni=0;ni<I;++ni,(next_node+=J)){
|
| 42 |
+
massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
|
| 43 |
+
massert(next_node == &net.nodeProb(ni,j+1));
|
| 44 |
+
/* if( VERB&&(*next_beta)*(*alprob)*(*next_node) )
|
| 45 |
+
cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
|
| 46 |
+
<< next_beta-beta.begin() << ") alprob:" << (*alprob) << " lexprob:" << (*next_node) << endl;*/
|
| 47 |
+
(*cur_beta)+=(*next_beta++)*(*alprob++)*(*next_node);
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
for(int i=0;i<I;i++)
|
| 51 |
+
alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
|
| 52 |
+
double* cur_alpha=conv<double>(alpha.begin())+I;
|
| 53 |
+
cur_beta=conv<double>(beta.begin())+I;
|
| 54 |
+
for(int j=1;j<J;j++){
|
| 55 |
+
Array2<double>&e=E[ (E.size()==1)?0:(j-1) ];
|
| 56 |
+
if( (E.size()!=1) || j==1 )
|
| 57 |
+
{
|
| 58 |
+
e.resize(I,I);
|
| 59 |
+
fill(e.begin(),e.end(),0.0);
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
for(int ti=0;ti<I;++ti,++cur_alpha,++cur_beta) {
|
| 63 |
+
const double * prev_alpha=conv<double>(alpha.begin())+I*(j-1);
|
| 64 |
+
double *cur_e= &e(ti,0);
|
| 65 |
+
double this_node=net.nodeProb(ti,j);
|
| 66 |
+
const double* alprob= &net.outProb(j-1,0,ti);
|
| 67 |
+
for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
|
| 68 |
+
massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
|
| 69 |
+
massert(&e(ti,pi)==cur_e);
|
| 70 |
+
const double alpha_increment= *prev_alpha*(*alprob)*this_node;
|
| 71 |
+
(*cur_alpha)+=alpha_increment;
|
| 72 |
+
(*cur_e++)+=alpha_increment*(*cur_beta);
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
}
|
| 76 |
+
g.resize(N);
|
| 77 |
+
transform(alpha.begin(),alpha.end(),beta.begin(),g.begin(),multiplies<double>());
|
| 78 |
+
double bsum=0,esum=0,esum2;
|
| 79 |
+
for(int i=0;i<I;i++)
|
| 80 |
+
bsum+=beta[i]*net.nodeProb(i,0)*net.getAlphainit(i);
|
| 81 |
+
for(unsigned int j=0;j<(unsigned int)E.size();j++)
|
| 82 |
+
{
|
| 83 |
+
Array2<double>&e=E[j];
|
| 84 |
+
const double *epe=e.end();
|
| 85 |
+
for(const double*ep=e.begin();ep!=epe;++ep)
|
| 86 |
+
esum+=*ep;
|
| 87 |
+
}
|
| 88 |
+
if( J>1 )
|
| 89 |
+
esum2=esum/(J-1);
|
| 90 |
+
else
|
| 91 |
+
esum2=0.0;
|
| 92 |
+
if(!(esum2==0.0||mfabs(esum2-bsum)/bsum<1e-3*I))
|
| 93 |
+
cout << "ERROR2: " << esum2 <<" " <<bsum << " " << esum << net << endl;
|
| 94 |
+
double * sumptr=conv<double>(sum.begin());
|
| 95 |
+
double* ge=conv<double>(g.end());
|
| 96 |
+
for(double* gp=conv<double>(g.begin());gp!=ge;gp+=I)
|
| 97 |
+
{
|
| 98 |
+
*sumptr++=normalize_if_possible(gp,gp+I);
|
| 99 |
+
if(bsum && !(mfabs((*(sumptr-1)-bsum)/bsum)<1e-3*I))
|
| 100 |
+
cout << "ERROR: " << *(sumptr-1) << " " << bsum << " " << mfabs((*(sumptr-1)-bsum)/bsum) << ' ' << I << ' ' << J << endl;
|
| 101 |
+
}
|
| 102 |
+
for(unsigned int j=0;j<(unsigned int)E.size();j++)
|
| 103 |
+
{
|
| 104 |
+
Array2<double>&e=E[j];
|
| 105 |
+
double* epe=e.end();
|
| 106 |
+
if( esum )
|
| 107 |
+
for(double*ep=e.begin();ep!=epe;++ep)
|
| 108 |
+
*ep/=esum;
|
| 109 |
+
else
|
| 110 |
+
for(double*ep=e.begin();ep!=epe;++ep)
|
| 111 |
+
*ep/=1.0/(max(I*I,I*I*(J-1)));
|
| 112 |
+
}
|
| 113 |
+
if( sum.size() )
|
| 114 |
+
return sum[0];
|
| 115 |
+
else
|
| 116 |
+
return 1.0;
|
| 117 |
+
}
|
| 118 |
+
void HMMViterbi(const HMMNetwork&net,Array<int>&vit) {
|
| 119 |
+
const int I=net.size1(),J=net.size2();
|
| 120 |
+
vit.resize(J);
|
| 121 |
+
Array<double>g;
|
| 122 |
+
Array<Array2<double> >e(1);
|
| 123 |
+
ForwardBackwardTraining(net,g,e);
|
| 124 |
+
for(int j=0;j<J;j++) {
|
| 125 |
+
double * begin=conv<double>(g.begin())+I*j;
|
| 126 |
+
vit[j]=max_element(begin,begin+I)-begin;
|
| 127 |
+
}
|
| 128 |
+
}
|
| 129 |
+
void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit) {
|
| 130 |
+
const int I=net.size1(),J=net.size2();
|
| 131 |
+
vit.resize(J);
|
| 132 |
+
for(int j=0;j<J;j++) {
|
| 133 |
+
double* begin=conv<double>(g.begin())+I*j;
|
| 134 |
+
vit[j]=max_element(begin,begin+I)-begin;
|
| 135 |
+
}
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
double HMMRealViterbi(const HMMNetwork&net,Array<int>&vitar,int pegi,int pegj,bool verbose){
|
| 139 |
+
const int I=net.size1(),J=net.size2(),N=I*J;
|
| 140 |
+
Array<double> alpha(N,-1);
|
| 141 |
+
Array<double*> bp(N,(double*)0);
|
| 142 |
+
vitar.resize(J);
|
| 143 |
+
if( J==0 )
|
| 144 |
+
return 1.0;
|
| 145 |
+
for(int i=0;i<I;i++)
|
| 146 |
+
{
|
| 147 |
+
alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
|
| 148 |
+
if( i>I/2 )
|
| 149 |
+
alpha[i]=0; // only first empty word can be chosen
|
| 150 |
+
bp[i]=0;
|
| 151 |
+
}
|
| 152 |
+
double *cur_alpha=conv<double>(alpha.begin())+I;
|
| 153 |
+
double **cur_bp=conv<double*>(bp.begin())+I;
|
| 154 |
+
for(int j=1;j<J;j++)
|
| 155 |
+
{
|
| 156 |
+
if( pegj+1==j)
|
| 157 |
+
for(int ti=0;ti<I;ti++)
|
| 158 |
+
if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
|
| 159 |
+
(cur_alpha-I)[ti]=0.0;
|
| 160 |
+
for(int ti=0;ti<I;++ti,++cur_alpha,++cur_bp) {
|
| 161 |
+
double* prev_alpha=conv<double>(alpha.begin())+I*(j-1);
|
| 162 |
+
double this_node=net.nodeProb(ti,j);
|
| 163 |
+
const double *alprob= &net.outProb(j-1,0,ti);
|
| 164 |
+
for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
|
| 165 |
+
massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
|
| 166 |
+
const double alpha_increment= *prev_alpha*(*alprob)*this_node;
|
| 167 |
+
if( alpha_increment> *cur_alpha )
|
| 168 |
+
{
|
| 169 |
+
(*cur_alpha)=alpha_increment;
|
| 170 |
+
(*cur_bp)=prev_alpha;
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
}
|
| 175 |
+
for(int i=0;i<I;i++)
|
| 176 |
+
alpha[N-I+i]*=net.getBetainit(i);
|
| 177 |
+
if( pegj==J-1)
|
| 178 |
+
for(int ti=0;ti<I;ti++)
|
| 179 |
+
if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
|
| 180 |
+
(alpha)[N-I+ti]=0.0;
|
| 181 |
+
|
| 182 |
+
int j=J-1;
|
| 183 |
+
cur_alpha=conv<double>(alpha.begin())+j*I;
|
| 184 |
+
vitar[J-1]=max_element(cur_alpha,cur_alpha+I)-cur_alpha;
|
| 185 |
+
double ret= *max_element(cur_alpha,cur_alpha+I);
|
| 186 |
+
while(bp[vitar[j]+j*I])
|
| 187 |
+
{
|
| 188 |
+
cur_alpha-=I;
|
| 189 |
+
vitar[j-1]=bp[vitar[j]+j*I]-cur_alpha;
|
| 190 |
+
massert(vitar[j-1]<I&&vitar[j-1]>=0);
|
| 191 |
+
j--;
|
| 192 |
+
}
|
| 193 |
+
massert(j==0);
|
| 194 |
+
if( verbose )
|
| 195 |
+
{
|
| 196 |
+
cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
|
| 197 |
+
for(int j=0;j<J;j++)
|
| 198 |
+
cout << "NP " << net.nodeProb(vitar[j],j) << ' ' << "AP " << ((j==0)?net.getAlphainit(vitar[j]):net.outProb(j-1,vitar[j-1],vitar[j])) << " j:" << j << " i:" << vitar[j] << "; ";
|
| 199 |
+
cout << endl;
|
| 200 |
+
}
|
| 201 |
+
return ret;
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
|
| 205 |
+
Array<int> vitar;
|
| 206 |
+
double ret=HMMRealViterbi(net,vitar);
|
| 207 |
+
const int I=net.size1(),J=net.size2();
|
| 208 |
+
if( E.size()==1 )
|
| 209 |
+
{
|
| 210 |
+
Array2<double>&e=E[0];
|
| 211 |
+
e.resize(I,I);
|
| 212 |
+
g.resize(I*J);
|
| 213 |
+
fill(g.begin(),g.end(),0.0);
|
| 214 |
+
fill(e.begin(),e.end(),0.0);
|
| 215 |
+
for(int i=0;i<J;++i)
|
| 216 |
+
{
|
| 217 |
+
g[i*I+vitar[i]]=1.0;
|
| 218 |
+
if( i>0 )
|
| 219 |
+
e(vitar[i],vitar[i-1])++;
|
| 220 |
+
}
|
| 221 |
+
}
|
| 222 |
+
else
|
| 223 |
+
{
|
| 224 |
+
g.resize(I*J);
|
| 225 |
+
fill(g.begin(),g.end(),0.0);
|
| 226 |
+
for(int i=0;i<J;++i)
|
| 227 |
+
{
|
| 228 |
+
g[i*I+vitar[i]]=1.0;
|
| 229 |
+
if( i>0 )
|
| 230 |
+
{
|
| 231 |
+
Array2<double>&e=E[i-1];
|
| 232 |
+
e.resize(I,I);
|
| 233 |
+
fill(e.begin(),e.end(),0.0);
|
| 234 |
+
e(vitar[i],vitar[i-1])++;
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
}
|
| 238 |
+
return ret;
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
#endif
|
| 242 |
+
|
tools/giza-pp/GIZA++-v2/ForwardBackward.h
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef NO_EM_MARKOF_ZEUGS_DEFINED
|
| 24 |
+
#define NO_EM_MARKOF_ZEUGS_DEFINED
|
| 25 |
+
#ifndef NO_TRAINING
|
| 26 |
+
#include "myassert.h"
|
| 27 |
+
#include "Array.h"
|
| 28 |
+
#include "Array2.h"
|
| 29 |
+
|
| 30 |
+
class HMMNetwork
|
| 31 |
+
{
|
| 32 |
+
public:
|
| 33 |
+
int as,bs;
|
| 34 |
+
Array2<double> n;
|
| 35 |
+
Array<Array2<double> > e;
|
| 36 |
+
Array<double> alphainit;
|
| 37 |
+
Array<double> betainit;
|
| 38 |
+
int ab;
|
| 39 |
+
double finalMultiply;
|
| 40 |
+
HMMNetwork(int I,int J)
|
| 41 |
+
: as(I),bs(J),n(as,bs),/*e(as,as,0.0),*/e(0),alphainit(as,1.0/as),betainit(as,1.0),ab(as*bs),finalMultiply(1.0)
|
| 42 |
+
{}
|
| 43 |
+
double getAlphainit(int i)const{return alphainit[i];}
|
| 44 |
+
double getBetainit(int i)const{return betainit[i];}
|
| 45 |
+
inline int size1()const{return as;}
|
| 46 |
+
inline int size2()const{return bs;}
|
| 47 |
+
inline const double&nodeProb(int i,int j)const
|
| 48 |
+
{return n(i,j);}
|
| 49 |
+
inline const double&outProb(int j,int i1,int i2)const
|
| 50 |
+
{/*massert(e[min(int(e.size())-1,j)](i1,i2) );*/ return e[min(int(e.size())-1,j)](i1,i2);}
|
| 51 |
+
friend ostream&operator<<(ostream&out,const HMMNetwork&x)
|
| 52 |
+
{
|
| 53 |
+
return out <<"N: \n"<< x.n << endl << "E: \n" << x.e << "A:\n" << x.alphainit << "B:\n" << x.betainit << endl;
|
| 54 |
+
}
|
| 55 |
+
};
|
| 56 |
+
double ForwardBackwardTraining(const HMMNetwork&mc,Array<double>&gamma,Array<Array2<double> >&epsilon);
|
| 57 |
+
void HMMViterbi(const HMMNetwork&mc,Array<int>&vit);
|
| 58 |
+
double HMMRealViterbi(const HMMNetwork&net,Array<int>&vit,int pegi=-1,int pegj=-1,bool verbose=0);
|
| 59 |
+
double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&e);
|
| 60 |
+
void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit);
|
| 61 |
+
#endif
|
| 62 |
+
#endif
|
tools/giza-pp/GIZA++-v2/GIZA++
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5cd1757193a60c612d5eae91cd457399e43dfa45a036fa20ec2b11cfda5915f7
|
| 3 |
+
size 1139144
|
tools/giza-pp/GIZA++-v2/GNU.GPL
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
Preamble
|
| 4 |
+
|
| 5 |
+
The licenses for most software are designed to take away your freedom
|
| 6 |
+
to share and change it. By contrast, the GNU General Public License is
|
| 7 |
+
intended to guarantee your freedom to share and change free
|
| 8 |
+
software--to make sure the software is free for all its users. This
|
| 9 |
+
General Public License applies to most of the Free Software
|
| 10 |
+
Foundation's software and to any other program whose authors commit to
|
| 11 |
+
using it. (Some other Free Software Foundation software is covered by
|
| 12 |
+
the GNU Library General Public License instead.) You can apply it to
|
| 13 |
+
your programs, too.
|
| 14 |
+
|
| 15 |
+
When we speak of free software, we are referring to freedom, not
|
| 16 |
+
price. Our General Public Licenses are designed to make sure that you
|
| 17 |
+
have the freedom to distribute copies of free software (and charge for
|
| 18 |
+
this service if you wish), that you receive source code or can get it
|
| 19 |
+
if you want it, that you can change the software or use pieces of it
|
| 20 |
+
in new free programs; and that you know you can do these things.
|
| 21 |
+
|
| 22 |
+
To protect your rights, we need to make restrictions that forbid
|
| 23 |
+
anyone to deny you these rights or to ask you to surrender the
|
| 24 |
+
rights. These restrictions translate to certain responsibilities for
|
| 25 |
+
you if you distribute copies of the software, or if you modify it.
|
| 26 |
+
|
| 27 |
+
For example, if you distribute copies of such a program, whether
|
| 28 |
+
gratis or for a fee, you must give the recipients all the rights that
|
| 29 |
+
you have. You must make sure that they, too, receive or can get the
|
| 30 |
+
source code. And you must show them these terms so they know their
|
| 31 |
+
rights.
|
| 32 |
+
|
| 33 |
+
We protect your rights with two steps: (1) copyright the software, and
|
| 34 |
+
(2) offer you this license which gives you legal permission to copy,
|
| 35 |
+
distribute and/or modify the software.
|
| 36 |
+
|
| 37 |
+
Also, for each author's protection and ours, we want to make certain
|
| 38 |
+
that everyone understands that there is no warranty for this free
|
| 39 |
+
software. If the software is modified by someone else and passed on,
|
| 40 |
+
we want its recipients to know that what they have is not the
|
| 41 |
+
original, so that any problems introduced by others will not reflect
|
| 42 |
+
on the original authors' reputations.
|
| 43 |
+
|
| 44 |
+
Finally, any free program is threatened constantly by software
|
| 45 |
+
patents. We wish to avoid the danger that redistributors of a free
|
| 46 |
+
program will individually obtain patent licenses, in effect making the
|
| 47 |
+
program proprietary. To prevent this, we have made it clear that any
|
| 48 |
+
patent must be licensed for everyone's free use or not licensed at
|
| 49 |
+
all.
|
| 50 |
+
|
| 51 |
+
The precise terms and conditions for copying, distribution and
|
| 52 |
+
modification follow.
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
| 56 |
+
|
| 57 |
+
0. This License applies to any program or other work which contains a
|
| 58 |
+
notice placed by the copyright holder saying it may be distributed
|
| 59 |
+
under the terms of this General Public License. The "Program", below,
|
| 60 |
+
refers to any such program or work, and a "work based on the Program"
|
| 61 |
+
means either the Program or any derivative work under copyright law:
|
| 62 |
+
that is to say, a work containing the Program or a portion of it,
|
| 63 |
+
either verbatim or with modifications and/or translated into another
|
| 64 |
+
language. (Hereinafter, translation is included without limitation in
|
| 65 |
+
the term "modification".) Each licensee is addressed as "you".
|
| 66 |
+
|
| 67 |
+
Activities other than copying, distribution and modification are not
|
| 68 |
+
covered by this License; they are outside its scope. The act of
|
| 69 |
+
running the Program is not restricted, and the output from the Program
|
| 70 |
+
is covered only if its contents constitute a work based on the Program
|
| 71 |
+
(independent of having been made by running the Program). Whether that
|
| 72 |
+
is true depends on what the Program does.
|
| 73 |
+
|
| 74 |
+
1. You may copy and distribute verbatim copies of the Program's source
|
| 75 |
+
code as you receive it, in any medium, provided that you conspicuously
|
| 76 |
+
and appropriately publish on each copy an appropriate copyright notice
|
| 77 |
+
and disclaimer of warranty; keep intact all the notices that refer to
|
| 78 |
+
this License and to the absence of any warranty; and give any other
|
| 79 |
+
recipients of the Program a copy of this License along with the
|
| 80 |
+
Program.
|
| 81 |
+
|
| 82 |
+
You may charge a fee for the physical act of transferring a copy, and
|
| 83 |
+
you may at your option offer warranty protection in exchange for a
|
| 84 |
+
fee.
|
| 85 |
+
|
| 86 |
+
2. You may modify your copy or copies of the Program or any portion of
|
| 87 |
+
it, thus forming a work based on the Program, and copy and distribute
|
| 88 |
+
such modifications or work under the terms of Section 1 above,
|
| 89 |
+
provided that you also meet all of these conditions:
|
| 90 |
+
|
| 91 |
+
a) You must cause the modified files to carry prominent notices
|
| 92 |
+
stating that you changed the files and the date of any change.
|
| 93 |
+
|
| 94 |
+
b) You must cause any work that you distribute or publish, that
|
| 95 |
+
in whole or in part contains or is derived from the Program or
|
| 96 |
+
any part thereof, to be licensed as a whole at no charge to all
|
| 97 |
+
third parties under the terms of this License.
|
| 98 |
+
|
| 99 |
+
c) If the modified program normally reads commands interactively
|
| 100 |
+
when run, you must cause it, when started running for such
|
| 101 |
+
interactive use in the most ordinary way, to print or display an
|
| 102 |
+
announcement including an appropriate copyright notice and a
|
| 103 |
+
notice that there is no warranty (or else, saying that you
|
| 104 |
+
provide a warranty) and that users may redistribute the program
|
| 105 |
+
under these conditions, and telling the user how to view a copy
|
| 106 |
+
of this License. (Exception: if the Program itself is interactive
|
| 107 |
+
but does not normally print such an announcement, your work based
|
| 108 |
+
on the Program is not required to print an announcement.)
|
| 109 |
+
|
| 110 |
+
These requirements apply to the modified work as a whole. If
|
| 111 |
+
identifiable sections of that work are not derived from the Program,
|
| 112 |
+
and can be reasonably considered independent and separate works in
|
| 113 |
+
themselves, then this License, and its terms, do not apply to those
|
| 114 |
+
sections when you distribute them as separate works. But when you
|
| 115 |
+
distribute the same sections as part of a whole which is a work based
|
| 116 |
+
on the Program, the distribution of the whole must be on the terms of
|
| 117 |
+
this License, whose permissions for other licensees extend to the
|
| 118 |
+
entire whole, and thus to each and every part regardless of who wrote
|
| 119 |
+
it.
|
| 120 |
+
|
| 121 |
+
Thus, it is not the intent of this section to claim rights or contest
|
| 122 |
+
your rights to work written entirely by you; rather, the intent is to
|
| 123 |
+
exercise the right to control the distribution of derivative or
|
| 124 |
+
collective works based on the Program.
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
In addition, mere aggregation of another work not based on the Program
|
| 128 |
+
with the Program (or with a work based on the Program) on a volume of
|
| 129 |
+
a storage or distribution medium does not bring the other work under
|
| 130 |
+
the scope of this License.
|
| 131 |
+
|
| 132 |
+
3. You may copy and distribute the Program (or a work based on it,
|
| 133 |
+
under Section 2) in object code or executable form under the terms of
|
| 134 |
+
Sections 1 and 2 above provided that you also do one of the following:
|
| 135 |
+
|
| 136 |
+
a) Accompany it with the complete corresponding machine-readable
|
| 137 |
+
source code, which must be distributed under the terms of
|
| 138 |
+
Sections 1 and 2 above on a medium customarily used for software
|
| 139 |
+
interchange; or,
|
| 140 |
+
|
| 141 |
+
b) Accompany it with a written offer, valid for at least three
|
| 142 |
+
years, to give any third party, for a charge no more than your
|
| 143 |
+
cost of physically performing source distribution, a complete
|
| 144 |
+
machine-readable copy of the corresponding source code, to be
|
| 145 |
+
distributed under the terms of Sections 1 and 2 above on a medium
|
| 146 |
+
customarily used for software interchange; or,
|
| 147 |
+
|
| 148 |
+
c) Accompany it with the information you received as to the offer
|
| 149 |
+
to distribute corresponding source code. (This alternative is
|
| 150 |
+
allowed only for noncommercial distribution and only if you
|
| 151 |
+
received the program in object code or executable form with such
|
| 152 |
+
an offer, in accord with Subsection b above.)
|
| 153 |
+
|
| 154 |
+
The source code for a work means the preferred form of the work for
|
| 155 |
+
making modifications to it. For an executable work, complete source
|
| 156 |
+
code means all the source code for all modules it contains, plus any
|
| 157 |
+
associated interface definition files, plus the scripts used to
|
| 158 |
+
control compilation and installation of the executable. However, as a
|
| 159 |
+
special exception, the source code distributed need not include
|
| 160 |
+
anything that is normally distributed (in either source or binary
|
| 161 |
+
form) with the major components (compiler, kernel, and so on) of the
|
| 162 |
+
operating system on which the executable runs, unless that component
|
| 163 |
+
itself accompanies the executable.
|
| 164 |
+
|
| 165 |
+
If distribution of executable or object code is made by offering
|
| 166 |
+
access to copy from a designated place, then offering equivalent
|
| 167 |
+
access to copy the source code from the same place counts as
|
| 168 |
+
distribution of the source code, even though third parties are not
|
| 169 |
+
compelled to copy the source along with the object code.
|
| 170 |
+
|
| 171 |
+
4. You may not copy, modify, sublicense, or distribute the Program
|
| 172 |
+
except as expressly provided under this License. Any attempt otherwise
|
| 173 |
+
to copy, modify, sublicense or distribute the Program is void, and
|
| 174 |
+
will automatically terminate your rights under this License. However,
|
| 175 |
+
parties who have received copies, or rights, from you under this
|
| 176 |
+
License will not have their licenses terminated so long as such
|
| 177 |
+
parties remain in full compliance.
|
| 178 |
+
|
| 179 |
+
5. You are not required to accept this License, since you have not
|
| 180 |
+
signed it. However, nothing else grants you permission to modify or
|
| 181 |
+
distribute the Program or its derivative works. These actions are
|
| 182 |
+
prohibited by law if you do not accept this License. Therefore, by
|
| 183 |
+
modifying or distributing the Program (or any work based on the
|
| 184 |
+
Program), you indicate your acceptance of this License to do so, and
|
| 185 |
+
all its terms and conditions for copying, distributing or modifying
|
| 186 |
+
the Program or works based on it.
|
| 187 |
+
|
| 188 |
+
6. Each time you redistribute the Program (or any work based on the
|
| 189 |
+
Program), the recipient automatically receives a license from the
|
| 190 |
+
original licensor to copy, distribute or modify the Program subject to
|
| 191 |
+
these terms and conditions. You may not impose any further
|
| 192 |
+
restrictions on the recipients' exercise of the rights granted
|
| 193 |
+
herein. You are not responsible for enforcing compliance by third
|
| 194 |
+
parties to this License.
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
7. If, as a consequence of a court judgment or allegation of patent
|
| 198 |
+
infringement or for any other reason (not limited to patent issues),
|
| 199 |
+
conditions are imposed on you (whether by court order, agreement or
|
| 200 |
+
otherwise) that contradict the conditions of this License, they do not
|
| 201 |
+
excuse you from the conditions of this License. If you cannot
|
| 202 |
+
distribute so as to satisfy simultaneously your obligations under this
|
| 203 |
+
License and any other pertinent obligations, then as a consequence you
|
| 204 |
+
may not distribute the Program at all. For example, if a patent
|
| 205 |
+
license would not permit royalty-free redistribution of the Program by
|
| 206 |
+
all those who receive copies directly or indirectly through you, then
|
| 207 |
+
the only way you could satisfy both it and this License would be to
|
| 208 |
+
refrain entirely from distribution of the Program.
|
| 209 |
+
|
| 210 |
+
If any portion of this section is held invalid or unenforceable under
|
| 211 |
+
any particular circumstance, the balance of the section is intended to
|
| 212 |
+
apply and the section as a whole is intended to apply in other
|
| 213 |
+
circumstances.
|
| 214 |
+
|
| 215 |
+
It is not the purpose of this section to induce you to infringe any
|
| 216 |
+
patents or other property right claims or to contest validity of any
|
| 217 |
+
such claims; this section has the sole purpose of protecting the
|
| 218 |
+
integrity of the free software distribution system, which is
|
| 219 |
+
implemented by public license practices. Many people have made
|
| 220 |
+
generous contributions to the wide range of software distributed
|
| 221 |
+
through that system in reliance on consistent application of that
|
| 222 |
+
system; it is up to the author/donor to decide if he or she is willing
|
| 223 |
+
to distribute software through any other system and a licensee cannot
|
| 224 |
+
impose that choice.
|
| 225 |
+
|
| 226 |
+
This section is intended to make thoroughly clear what is believed to
|
| 227 |
+
be a consequence of the rest of this License.
|
| 228 |
+
|
| 229 |
+
8. If the distribution and/or use of the Program is restricted in
|
| 230 |
+
certain countries either by patents or by copyrighted interfaces, the
|
| 231 |
+
original copyright holder who places the Program under this License
|
| 232 |
+
may add an explicit geographical distribution limitation excluding
|
| 233 |
+
those countries, so that distribution is permitted only in or among
|
| 234 |
+
countries not thus excluded. In such case, this License incorporates
|
| 235 |
+
the limitation as if written in the body of this License.
|
| 236 |
+
|
| 237 |
+
9. The Free Software Foundation may publish revised and/or new
|
| 238 |
+
versions of the General Public License from time to time. Such new
|
| 239 |
+
versions will be similar in spirit to the present version, but may
|
| 240 |
+
differ in detail to address new problems or concerns.
|
| 241 |
+
|
| 242 |
+
Each version is given a distinguishing version number. If the Program
|
| 243 |
+
specifies a version number of this License which applies to it and
|
| 244 |
+
"any later version", you have the option of following the terms and
|
| 245 |
+
conditions either of that version or of any later version published by
|
| 246 |
+
the Free Software Foundation. If the Program does not specify a
|
| 247 |
+
version number of this License, you may choose any version ever
|
| 248 |
+
published by the Free Software Foundation.
|
| 249 |
+
|
| 250 |
+
10. If you wish to incorporate parts of the Program into other free
|
| 251 |
+
programs whose distribution conditions are different, write to the
|
| 252 |
+
author to ask for permission. For software which is copyrighted by the
|
| 253 |
+
Free Software Foundation, write to the Free Software Foundation; we
|
| 254 |
+
sometimes make exceptions for this. Our decision will be guided by the
|
| 255 |
+
two goals of preserving the free status of all derivatives of our free
|
| 256 |
+
software and of promoting the sharing and reuse of software generally.
|
| 257 |
+
|
| 258 |
+
NO WARRANTY
|
| 259 |
+
|
| 260 |
+
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
|
| 261 |
+
WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
|
| 262 |
+
LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS
|
| 263 |
+
AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF
|
| 264 |
+
ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
| 265 |
+
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
| 266 |
+
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
|
| 267 |
+
PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
|
| 268 |
+
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
|
| 272 |
+
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
|
| 273 |
+
AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU
|
| 274 |
+
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
|
| 275 |
+
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
|
| 276 |
+
PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
|
| 277 |
+
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
|
| 278 |
+
FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF
|
| 279 |
+
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
| 280 |
+
DAMAGES.
|
| 281 |
+
|
| 282 |
+
END OF TERMS AND CONDITIONS
|
tools/giza-pp/GIZA++-v2/Globals.h
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
#ifndef Globals_asdf_defined
|
| 23 |
+
#define Globals_asdf_defined
|
| 24 |
+
#include <string>
|
| 25 |
+
#include <fstream>
|
| 26 |
+
#include <map>
|
| 27 |
+
#include "defs.h"
|
| 28 |
+
#include "Vector.h"
|
| 29 |
+
|
| 30 |
+
extern float PROB_SMOOTH;
|
| 31 |
+
extern bool Verbose, Log, Peg, Transfer, Transfer2to3, useDict ;
|
| 32 |
+
extern string Prefix, LogFilename, OPath,
|
| 33 |
+
SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename,
|
| 34 |
+
t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
|
| 35 |
+
extern ofstream logmsg ;
|
| 36 |
+
extern double M5P0,P0 ;
|
| 37 |
+
extern bool NODUMPS, FEWDUMPS ;
|
| 38 |
+
extern string Usage ;
|
| 39 |
+
extern unsigned int MAX_SENTENCE_LENGTH ;
|
| 40 |
+
extern int PegUntil;
|
| 41 |
+
|
| 42 |
+
extern short DeficientDistortionForEmptyWord;
|
| 43 |
+
|
| 44 |
+
extern int M4_Dependencies;
|
| 45 |
+
extern int M5_Dependencies;
|
| 46 |
+
|
| 47 |
+
extern short OutputInAachenFormat;
|
| 48 |
+
|
| 49 |
+
#define DEP_MODEL_l 1
|
| 50 |
+
#define DEP_MODEL_m 2
|
| 51 |
+
#define DEP_MODEL_F 4
|
| 52 |
+
#define DEP_MODEL_E 8
|
| 53 |
+
|
| 54 |
+
#define DEP_MODELb_l 16
|
| 55 |
+
#define DEP_MODELb_m 32
|
| 56 |
+
#define DEP_MODELb_F 64
|
| 57 |
+
#define DEP_MODELb_E 128
|
| 58 |
+
|
| 59 |
+
#define DEP_SUM 256
|
| 60 |
+
|
| 61 |
+
class vcbList;
|
| 62 |
+
|
| 63 |
+
extern vcbList *globeTrainVcbList, *globfTrainVcbList;
|
| 64 |
+
|
| 65 |
+
extern short PredictionInAlignments;
|
| 66 |
+
extern short SmoothHMM;
|
| 67 |
+
#define VERB Verbose
|
| 68 |
+
|
| 69 |
+
double ErrorsInAlignment(const map< pair<int,int>,char >&reference,const Vector<WordIndex>&test,int l,int&missing,int&toomuch,int&eventsMissing,int&eventsToomuch,int);
|
| 70 |
+
extern Vector<map< pair<int,int>,char > > ReferenceAlignment;
|
| 71 |
+
void printGIZAPars(ostream&out);
|
| 72 |
+
|
| 73 |
+
#endif
|
tools/giza-pp/GIZA++-v2/HMMTables.cpp
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#include "HMMTables.h"
|
| 24 |
+
#include <fstream>
|
| 25 |
+
#include "Globals.h"
|
| 26 |
+
#include "Parameter.h"
|
| 27 |
+
|
| 28 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 29 |
+
void HMMTables<CLS,MAPPERCLASSTOSTRING>::writeJumps(ostream&out) const
|
| 30 |
+
{
|
| 31 |
+
double ssum=0.0;
|
| 32 |
+
for(typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=alProb.begin();i!=alProb.end();++i)
|
| 33 |
+
{
|
| 34 |
+
double sum=0.0;
|
| 35 |
+
out << "\n\nDistribution for: ";
|
| 36 |
+
printAlDeps(out,i->first,*mapper1,*mapper2);
|
| 37 |
+
out << ' ';
|
| 38 |
+
for(int a=i->second.low();a<=i->second.high();++a)
|
| 39 |
+
if( i->second[a] )
|
| 40 |
+
{
|
| 41 |
+
out << a << ':' << i->second[a] << ';' << ' ';
|
| 42 |
+
sum+=i->second[a];
|
| 43 |
+
}
|
| 44 |
+
out << '\n' << '\n';
|
| 45 |
+
out << "SUM: " << sum << '\n';
|
| 46 |
+
ssum+=sum;
|
| 47 |
+
}
|
| 48 |
+
out << "FULL-SUM: " << ssum << '\n';
|
| 49 |
+
}
|
| 50 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 51 |
+
void HMMTables<CLS,MAPPERCLASSTOSTRING>::readJumps(istream&)
|
| 52 |
+
{
|
| 53 |
+
}
|
| 54 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 55 |
+
double HMMTables<CLS,MAPPERCLASSTOSTRING>::getAlProb(int istrich,int k,int sentLength,int J,CLS w1,CLS w2,int j,int iter) const
|
| 56 |
+
{
|
| 57 |
+
massert(k<sentLength&&k>=0);
|
| 58 |
+
massert(istrich<sentLength&&istrich>=-1);
|
| 59 |
+
int pos=istrich-k;
|
| 60 |
+
switch(PredictionInAlignments)
|
| 61 |
+
{
|
| 62 |
+
case 0: pos=istrich-k; break;
|
| 63 |
+
case 1: pos=k; break;
|
| 64 |
+
case 2:
|
| 65 |
+
pos=(k*J-j*sentLength);
|
| 66 |
+
if( pos>0 ) pos+=J/2; else pos-=J/2;
|
| 67 |
+
pos/=J;
|
| 68 |
+
break;
|
| 69 |
+
default:abort();
|
| 70 |
+
}
|
| 71 |
+
typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator p=alProb.find(AlDeps<CLS>(sentLength,istrich,j,w1,w2));
|
| 72 |
+
if( p!=alProb.end() )
|
| 73 |
+
{
|
| 74 |
+
return (p->second)[pos];
|
| 75 |
+
}
|
| 76 |
+
else
|
| 77 |
+
{
|
| 78 |
+
if( iter>0&&iter<5000 )
|
| 79 |
+
cout << "WARNING: Not found: " << ' ' << J << ' ' << sentLength << '\n';;
|
| 80 |
+
return 1.0/(2*sentLength-1);
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 85 |
+
void HMMTables<CLS,MAPPERCLASSTOSTRING>::addAlCount(int istrich,int k,int sentLength,int J,CLS w1,CLS w2,int j,double value,double valuePredicted)
|
| 86 |
+
{
|
| 87 |
+
int pos=istrich-k;
|
| 88 |
+
switch(PredictionInAlignments)
|
| 89 |
+
{
|
| 90 |
+
case 0: pos=istrich-k; break;
|
| 91 |
+
case 1: pos=k; break;
|
| 92 |
+
case 2:
|
| 93 |
+
pos=(k*J-j*sentLength);
|
| 94 |
+
if( pos>0 ) pos+=J/2; else pos-=J/2;
|
| 95 |
+
pos/=J;
|
| 96 |
+
break;
|
| 97 |
+
default:abort();
|
| 98 |
+
}
|
| 99 |
+
AlDeps<CLS> deps(AlDeps<CLS>(sentLength,istrich,j,w1,w2));
|
| 100 |
+
|
| 101 |
+
{
|
| 102 |
+
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=alProb.find(deps);
|
| 103 |
+
if( p==alProb.end() )
|
| 104 |
+
{
|
| 105 |
+
if( (CompareAlDeps&1)==0 )
|
| 106 |
+
p=alProb.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
|
| 107 |
+
else
|
| 108 |
+
p=alProb.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
|
| 109 |
+
}
|
| 110 |
+
p->second[pos]+=value;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
if( valuePredicted )
|
| 114 |
+
{
|
| 115 |
+
typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=alProbPredicted.find(deps);
|
| 116 |
+
if( p==alProbPredicted.end() )
|
| 117 |
+
{
|
| 118 |
+
if( (CompareAlDeps&1)==0 )
|
| 119 |
+
p=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
|
| 120 |
+
else
|
| 121 |
+
p=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
|
| 122 |
+
}
|
| 123 |
+
p->second[pos]+=valuePredicted;
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 128 |
+
Array<double>&HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetAlphaInit(int I)
|
| 129 |
+
{
|
| 130 |
+
if( !init_alpha.count(I) )
|
| 131 |
+
init_alpha[I]=Array<double>(I,0);
|
| 132 |
+
return init_alpha[I];
|
| 133 |
+
}
|
| 134 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 135 |
+
Array<double>&HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetBetaInit(int I)
|
| 136 |
+
{
|
| 137 |
+
if( !init_beta.count(I) )
|
| 138 |
+
init_beta[I]=Array<double>(I,0);
|
| 139 |
+
return init_beta[I];
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 143 |
+
bool HMMTables<CLS,MAPPERCLASSTOSTRING>::getAlphaInit(int I,Array<double>&x)const
|
| 144 |
+
{
|
| 145 |
+
hash_map<int,Array<double> >::const_iterator i=init_alpha.find(I);
|
| 146 |
+
if( i==init_alpha.end() )
|
| 147 |
+
return 0;
|
| 148 |
+
else
|
| 149 |
+
{
|
| 150 |
+
x=i->second;
|
| 151 |
+
for(unsigned int j=x.size()/2+1;j<x.size();++j) // only first empty word can be chosen
|
| 152 |
+
x[j]=0;
|
| 153 |
+
return 1;
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 157 |
+
bool HMMTables<CLS,MAPPERCLASSTOSTRING>::getBetaInit(int I,Array<double>&x)const
|
| 158 |
+
{
|
| 159 |
+
hash_map<int,Array<double> >::const_iterator i=init_beta.find(I);
|
| 160 |
+
if( i==init_beta.end() )
|
| 161 |
+
return 0;
|
| 162 |
+
else
|
| 163 |
+
{
|
| 164 |
+
x=i->second;
|
| 165 |
+
return 1;
|
| 166 |
+
}
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 170 |
+
HMMTables<CLS,MAPPERCLASSTOSTRING>:: HMMTables(double _probForEmpty,const MAPPERCLASSTOSTRING&m1,const MAPPERCLASSTOSTRING&m2):
|
| 171 |
+
probabilityForEmpty(mfabs(_probForEmpty)),
|
| 172 |
+
updateProbabilityForEmpty(_probForEmpty<0.0),
|
| 173 |
+
mapper1(&m1),
|
| 174 |
+
mapper2(&m2)
|
| 175 |
+
{}
|
| 176 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 177 |
+
HMMTables<CLS,MAPPERCLASSTOSTRING>::~HMMTables() {}
|
tools/giza-pp/GIZA++-v2/HMMTables.h
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef HMM_TABLES_H_ASDF_DEFINED
|
| 24 |
+
#define HMM_TABLES_H_ASDF_DEFINED
|
| 25 |
+
#include "FlexArray.h"
|
| 26 |
+
|
| 27 |
+
#include "Array.h"
|
| 28 |
+
#include <map>
|
| 29 |
+
#include "mymath.h"
|
| 30 |
+
|
| 31 |
+
template<class T>
|
| 32 |
+
T normalize_if_possible(T*a,T*b)
|
| 33 |
+
{
|
| 34 |
+
T sum=0;
|
| 35 |
+
for(T*i=a;i!=b;++i)
|
| 36 |
+
sum+=*i;
|
| 37 |
+
if( sum )
|
| 38 |
+
for(T*i=a;i!=b;++i)
|
| 39 |
+
*i/=sum;
|
| 40 |
+
else
|
| 41 |
+
fill(a,b,1.0/(b-a));
|
| 42 |
+
return sum;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
extern short CompareAlDeps;
|
| 46 |
+
template<class CLS>
|
| 47 |
+
class AlDeps
|
| 48 |
+
{
|
| 49 |
+
public:
|
| 50 |
+
int englishSentenceLength;
|
| 51 |
+
CLS classPrevious;
|
| 52 |
+
int previous;
|
| 53 |
+
int j;
|
| 54 |
+
CLS Cj;
|
| 55 |
+
AlDeps(int l,int p=0,int _j=0,CLS s1=0,CLS _Cj=0)
|
| 56 |
+
: englishSentenceLength(l),classPrevious(s1),previous(p),j(_j),Cj(_Cj)
|
| 57 |
+
{}
|
| 58 |
+
friend bool operator<(const AlDeps&x,const AlDeps&y)
|
| 59 |
+
{
|
| 60 |
+
if( (CompareAlDeps&1) && x.englishSentenceLength<y.englishSentenceLength ) return 1;
|
| 61 |
+
if( (CompareAlDeps&1) && y.englishSentenceLength<x.englishSentenceLength ) return 0;
|
| 62 |
+
if( (CompareAlDeps&2) && x.classPrevious<y.classPrevious ) return 1;
|
| 63 |
+
if( (CompareAlDeps&2) && y.classPrevious<x.classPrevious ) return 0;
|
| 64 |
+
if( (CompareAlDeps&4) && x.previous<y.previous ) return 1;
|
| 65 |
+
if( (CompareAlDeps&4) && y.previous<x.previous ) return 0;
|
| 66 |
+
if( (CompareAlDeps&8) && x.j<y.j ) return 1;
|
| 67 |
+
if( (CompareAlDeps&8) && y.j<x.j ) return 0;
|
| 68 |
+
if( (CompareAlDeps&16) && x.Cj<y.Cj ) return 1;
|
| 69 |
+
if( (CompareAlDeps&16) && y.Cj<x.Cj ) return 0;
|
| 70 |
+
return 0;
|
| 71 |
+
}
|
| 72 |
+
friend bool operator==(const AlDeps&x,const AlDeps&y)
|
| 73 |
+
{ return !( x<y || y<x ); }
|
| 74 |
+
};
|
| 75 |
+
|
| 76 |
+
template<class CLS>
|
| 77 |
+
class Hash_AlDeps
|
| 78 |
+
{
|
| 79 |
+
public:
|
| 80 |
+
unsigned
|
| 81 |
+
int
|
| 82 |
+
operator()
|
| 83 |
+
(const AlDeps<CLS>&x)
|
| 84 |
+
const
|
| 85 |
+
{
|
| 86 |
+
unsigned int hash=0;
|
| 87 |
+
if( (CompareAlDeps&1) ) { hash=hash+x.englishSentenceLength;hash*=31;}
|
| 88 |
+
if( (CompareAlDeps&2) ) { hash=hash+x.classPrevious;hash*=31;}
|
| 89 |
+
if( (CompareAlDeps&4) ) { hash=hash+x.previous;hash*=31;}
|
| 90 |
+
if( (CompareAlDeps&8) ) { hash=hash+x.j;hash*=31;}
|
| 91 |
+
if( (CompareAlDeps&16) ) { hash=hash+x.Cj;hash*=31;}
|
| 92 |
+
return hash;
|
| 93 |
+
|
| 94 |
+
}
|
| 95 |
+
};
|
| 96 |
+
|
| 97 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 98 |
+
class HMMTables
|
| 99 |
+
{
|
| 100 |
+
protected:
|
| 101 |
+
double probabilityForEmpty;
|
| 102 |
+
bool updateProbabilityForEmpty;
|
| 103 |
+
hash_map<int,Array<double> > init_alpha;
|
| 104 |
+
hash_map<int,Array<double> > init_beta;
|
| 105 |
+
map<AlDeps<CLS>,FlexArray<double> > alProb;
|
| 106 |
+
map<AlDeps<CLS>,FlexArray<double> > alProbPredicted;
|
| 107 |
+
int globalCounter;
|
| 108 |
+
double divSum;
|
| 109 |
+
double p0_count,np0_count;
|
| 110 |
+
const MAPPERCLASSTOSTRING*mapper1;
|
| 111 |
+
const MAPPERCLASSTOSTRING*mapper2;
|
| 112 |
+
public:
|
| 113 |
+
const HMMTables<CLS,MAPPERCLASSTOSTRING>*getThis()const {return this;}
|
| 114 |
+
HMMTables(double _probForEmpty,const MAPPERCLASSTOSTRING&m1,const MAPPERCLASSTOSTRING&m2);
|
| 115 |
+
virtual ~HMMTables();
|
| 116 |
+
virtual double getAlProb(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,int iter=0) const;
|
| 117 |
+
virtual void writeJumps(ostream&) const;
|
| 118 |
+
void addAlCount(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,double value,double valuePredicted);
|
| 119 |
+
virtual void readJumps(istream&);
|
| 120 |
+
virtual bool getAlphaInit(int I,Array<double>&x)const;
|
| 121 |
+
virtual bool getBetaInit(int I,Array<double>&x)const;
|
| 122 |
+
Array<double>&doGetAlphaInit(int I);
|
| 123 |
+
Array<double>&doGetBetaInit(int I);
|
| 124 |
+
virtual double getProbabilityForEmpty()const
|
| 125 |
+
{return probabilityForEmpty;}
|
| 126 |
+
void performGISIteration(const HMMTables<CLS,MAPPERCLASSTOSTRING>*old)
|
| 127 |
+
{
|
| 128 |
+
cout << "OLDSIZE: " << (old?(old->alProb.size()):0) << " NEWSIZE:"<< alProb.size()<< endl;
|
| 129 |
+
for(typename map<AlDeps<CLS>,FlexArray<double> >::iterator i=alProb.begin();i!=alProb.end();++i)
|
| 130 |
+
{
|
| 131 |
+
if( alProbPredicted.count(i->first))
|
| 132 |
+
{
|
| 133 |
+
normalize_if_possible(i->second.begin(),i->second.end());
|
| 134 |
+
normalize_if_possible(alProbPredicted[i->first].begin(),alProbPredicted[i->first].end());
|
| 135 |
+
for(int j=i->second.low();j<=i->second.high();++j)
|
| 136 |
+
{
|
| 137 |
+
if( i->second[j] )
|
| 138 |
+
if(alProbPredicted[i->first][j]>0.0 )
|
| 139 |
+
{
|
| 140 |
+
double op=1.0;
|
| 141 |
+
if( old && old->alProb.count(i->first) )
|
| 142 |
+
op=(old->alProb.find(i->first)->second)[j];
|
| 143 |
+
//cerr << "GIS: " << j << ' ' << " OLD:"
|
| 144 |
+
// << op << "*true:"
|
| 145 |
+
// << i->second[j] << "/pred:" << alProbPredicted[i->first][j] << " -> ";
|
| 146 |
+
i->second[j]= op*(i->second[j]/alProbPredicted[i->first][j]);
|
| 147 |
+
//cerr << i->second[j] << endl;
|
| 148 |
+
}
|
| 149 |
+
else
|
| 150 |
+
{
|
| 151 |
+
cerr << "ERROR2 in performGISiteration: " << i->second[j] << endl;
|
| 152 |
+
}
|
| 153 |
+
}
|
| 154 |
+
}
|
| 155 |
+
else
|
| 156 |
+
cerr << "ERROR in performGISIteration: " << alProbPredicted.count(i->first) << endl;
|
| 157 |
+
}
|
| 158 |
+
}
|
| 159 |
+
};
|
| 160 |
+
|
| 161 |
+
template<class CLS,class MAPPERCLASSTOSTRING>
|
| 162 |
+
inline void printAlDeps(ostream&out,const AlDeps<CLS>&x,const MAPPERCLASSTOSTRING&mapper1,const MAPPERCLASSTOSTRING&mapper2)
|
| 163 |
+
{
|
| 164 |
+
if( (CompareAlDeps&1) ) out << "sentenceLength: " << x.englishSentenceLength<< ' ';
|
| 165 |
+
if( (CompareAlDeps&2) ) out << "previousClass: " << mapper1.classString(x.classPrevious) << ' ';
|
| 166 |
+
if( (CompareAlDeps&4) ) out << "previousPosition: " << x.previous << ' ';
|
| 167 |
+
if( (CompareAlDeps&8) ) out << "FrenchPosition: " << x.j << ' ';
|
| 168 |
+
if( (CompareAlDeps&16) ) out << "FrenchClass: " << mapper2.classString(x.Cj) << ' ';
|
| 169 |
+
//out << '\n';
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
#endif
|
tools/giza-pp/GIZA++-v2/LICENSE
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
Preamble
|
| 4 |
+
|
| 5 |
+
The licenses for most software are designed to take away your freedom
|
| 6 |
+
to share and change it. By contrast, the GNU General Public License is
|
| 7 |
+
intended to guarantee your freedom to share and change free
|
| 8 |
+
software--to make sure the software is free for all its users. This
|
| 9 |
+
General Public License applies to most of the Free Software
|
| 10 |
+
Foundation's software and to any other program whose authors commit to
|
| 11 |
+
using it. (Some other Free Software Foundation software is covered by
|
| 12 |
+
the GNU Library General Public License instead.) You can apply it to
|
| 13 |
+
your programs, too.
|
| 14 |
+
|
| 15 |
+
When we speak of free software, we are referring to freedom, not
|
| 16 |
+
price. Our General Public Licenses are designed to make sure that you
|
| 17 |
+
have the freedom to distribute copies of free software (and charge for
|
| 18 |
+
this service if you wish), that you receive source code or can get it
|
| 19 |
+
if you want it, that you can change the software or use pieces of it
|
| 20 |
+
in new free programs; and that you know you can do these things.
|
| 21 |
+
|
| 22 |
+
To protect your rights, we need to make restrictions that forbid
|
| 23 |
+
anyone to deny you these rights or to ask you to surrender the
|
| 24 |
+
rights. These restrictions translate to certain responsibilities for
|
| 25 |
+
you if you distribute copies of the software, or if you modify it.
|
| 26 |
+
|
| 27 |
+
For example, if you distribute copies of such a program, whether
|
| 28 |
+
gratis or for a fee, you must give the recipients all the rights that
|
| 29 |
+
you have. You must make sure that they, too, receive or can get the
|
| 30 |
+
source code. And you must show them these terms so they know their
|
| 31 |
+
rights.
|
| 32 |
+
|
| 33 |
+
We protect your rights with two steps: (1) copyright the software, and
|
| 34 |
+
(2) offer you this license which gives you legal permission to copy,
|
| 35 |
+
distribute and/or modify the software.
|
| 36 |
+
|
| 37 |
+
Also, for each author's protection and ours, we want to make certain
|
| 38 |
+
that everyone understands that there is no warranty for this free
|
| 39 |
+
software. If the software is modified by someone else and passed on,
|
| 40 |
+
we want its recipients to know that what they have is not the
|
| 41 |
+
original, so that any problems introduced by others will not reflect
|
| 42 |
+
on the original authors' reputations.
|
| 43 |
+
|
| 44 |
+
Finally, any free program is threatened constantly by software
|
| 45 |
+
patents. We wish to avoid the danger that redistributors of a free
|
| 46 |
+
program will individually obtain patent licenses, in effect making the
|
| 47 |
+
program proprietary. To prevent this, we have made it clear that any
|
| 48 |
+
patent must be licensed for everyone's free use or not licensed at
|
| 49 |
+
all.
|
| 50 |
+
|
| 51 |
+
The precise terms and conditions for copying, distribution and
|
| 52 |
+
modification follow.
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
| 56 |
+
|
| 57 |
+
0. This License applies to any program or other work which contains a
|
| 58 |
+
notice placed by the copyright holder saying it may be distributed
|
| 59 |
+
under the terms of this General Public License. The "Program", below,
|
| 60 |
+
refers to any such program or work, and a "work based on the Program"
|
| 61 |
+
means either the Program or any derivative work under copyright law:
|
| 62 |
+
that is to say, a work containing the Program or a portion of it,
|
| 63 |
+
either verbatim or with modifications and/or translated into another
|
| 64 |
+
language. (Hereinafter, translation is included without limitation in
|
| 65 |
+
the term "modification".) Each licensee is addressed as "you".
|
| 66 |
+
|
| 67 |
+
Activities other than copying, distribution and modification are not
|
| 68 |
+
covered by this License; they are outside its scope. The act of
|
| 69 |
+
running the Program is not restricted, and the output from the Program
|
| 70 |
+
is covered only if its contents constitute a work based on the Program
|
| 71 |
+
(independent of having been made by running the Program). Whether that
|
| 72 |
+
is true depends on what the Program does.
|
| 73 |
+
|
| 74 |
+
1. You may copy and distribute verbatim copies of the Program's source
|
| 75 |
+
code as you receive it, in any medium, provided that you conspicuously
|
| 76 |
+
and appropriately publish on each copy an appropriate copyright notice
|
| 77 |
+
and disclaimer of warranty; keep intact all the notices that refer to
|
| 78 |
+
this License and to the absence of any warranty; and give any other
|
| 79 |
+
recipients of the Program a copy of this License along with the
|
| 80 |
+
Program.
|
| 81 |
+
|
| 82 |
+
You may charge a fee for the physical act of transferring a copy, and
|
| 83 |
+
you may at your option offer warranty protection in exchange for a
|
| 84 |
+
fee.
|
| 85 |
+
|
| 86 |
+
2. You may modify your copy or copies of the Program or any portion of
|
| 87 |
+
it, thus forming a work based on the Program, and copy and distribute
|
| 88 |
+
such modifications or work under the terms of Section 1 above,
|
| 89 |
+
provided that you also meet all of these conditions:
|
| 90 |
+
|
| 91 |
+
a) You must cause the modified files to carry prominent notices
|
| 92 |
+
stating that you changed the files and the date of any change.
|
| 93 |
+
|
| 94 |
+
b) You must cause any work that you distribute or publish, that
|
| 95 |
+
in whole or in part contains or is derived from the Program or
|
| 96 |
+
any part thereof, to be licensed as a whole at no charge to all
|
| 97 |
+
third parties under the terms of this License.
|
| 98 |
+
|
| 99 |
+
c) If the modified program normally reads commands interactively
|
| 100 |
+
when run, you must cause it, when started running for such
|
| 101 |
+
interactive use in the most ordinary way, to print or display an
|
| 102 |
+
announcement including an appropriate copyright notice and a
|
| 103 |
+
notice that there is no warranty (or else, saying that you
|
| 104 |
+
provide a warranty) and that users may redistribute the program
|
| 105 |
+
under these conditions, and telling the user how to view a copy
|
| 106 |
+
of this License. (Exception: if the Program itself is interactive
|
| 107 |
+
but does not normally print such an announcement, your work based
|
| 108 |
+
on the Program is not required to print an announcement.)
|
| 109 |
+
|
| 110 |
+
These requirements apply to the modified work as a whole. If
|
| 111 |
+
identifiable sections of that work are not derived from the Program,
|
| 112 |
+
and can be reasonably considered independent and separate works in
|
| 113 |
+
themselves, then this License, and its terms, do not apply to those
|
| 114 |
+
sections when you distribute them as separate works. But when you
|
| 115 |
+
distribute the same sections as part of a whole which is a work based
|
| 116 |
+
on the Program, the distribution of the whole must be on the terms of
|
| 117 |
+
this License, whose permissions for other licensees extend to the
|
| 118 |
+
entire whole, and thus to each and every part regardless of who wrote
|
| 119 |
+
it.
|
| 120 |
+
|
| 121 |
+
Thus, it is not the intent of this section to claim rights or contest
|
| 122 |
+
your rights to work written entirely by you; rather, the intent is to
|
| 123 |
+
exercise the right to control the distribution of derivative or
|
| 124 |
+
collective works based on the Program.
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
In addition, mere aggregation of another work not based on the Program
|
| 128 |
+
with the Program (or with a work based on the Program) on a volume of
|
| 129 |
+
a storage or distribution medium does not bring the other work under
|
| 130 |
+
the scope of this License.
|
| 131 |
+
|
| 132 |
+
3. You may copy and distribute the Program (or a work based on it,
|
| 133 |
+
under Section 2) in object code or executable form under the terms of
|
| 134 |
+
Sections 1 and 2 above provided that you also do one of the following:
|
| 135 |
+
|
| 136 |
+
a) Accompany it with the complete corresponding machine-readable
|
| 137 |
+
source code, which must be distributed under the terms of
|
| 138 |
+
Sections 1 and 2 above on a medium customarily used for software
|
| 139 |
+
interchange; or,
|
| 140 |
+
|
| 141 |
+
b) Accompany it with a written offer, valid for at least three
|
| 142 |
+
years, to give any third party, for a charge no more than your
|
| 143 |
+
cost of physically performing source distribution, a complete
|
| 144 |
+
machine-readable copy of the corresponding source code, to be
|
| 145 |
+
distributed under the terms of Sections 1 and 2 above on a medium
|
| 146 |
+
customarily used for software interchange; or,
|
| 147 |
+
|
| 148 |
+
c) Accompany it with the information you received as to the offer
|
| 149 |
+
to distribute corresponding source code. (This alternative is
|
| 150 |
+
allowed only for noncommercial distribution and only if you
|
| 151 |
+
received the program in object code or executable form with such
|
| 152 |
+
an offer, in accord with Subsection b above.)
|
| 153 |
+
|
| 154 |
+
The source code for a work means the preferred form of the work for
|
| 155 |
+
making modifications to it. For an executable work, complete source
|
| 156 |
+
code means all the source code for all modules it contains, plus any
|
| 157 |
+
associated interface definition files, plus the scripts used to
|
| 158 |
+
control compilation and installation of the executable. However, as a
|
| 159 |
+
special exception, the source code distributed need not include
|
| 160 |
+
anything that is normally distributed (in either source or binary
|
| 161 |
+
form) with the major components (compiler, kernel, and so on) of the
|
| 162 |
+
operating system on which the executable runs, unless that component
|
| 163 |
+
itself accompanies the executable.
|
| 164 |
+
|
| 165 |
+
If distribution of executable or object code is made by offering
|
| 166 |
+
access to copy from a designated place, then offering equivalent
|
| 167 |
+
access to copy the source code from the same place counts as
|
| 168 |
+
distribution of the source code, even though third parties are not
|
| 169 |
+
compelled to copy the source along with the object code.
|
| 170 |
+
|
| 171 |
+
4. You may not copy, modify, sublicense, or distribute the Program
|
| 172 |
+
except as expressly provided under this License. Any attempt otherwise
|
| 173 |
+
to copy, modify, sublicense or distribute the Program is void, and
|
| 174 |
+
will automatically terminate your rights under this License. However,
|
| 175 |
+
parties who have received copies, or rights, from you under this
|
| 176 |
+
License will not have their licenses terminated so long as such
|
| 177 |
+
parties remain in full compliance.
|
| 178 |
+
|
| 179 |
+
5. You are not required to accept this License, since you have not
|
| 180 |
+
signed it. However, nothing else grants you permission to modify or
|
| 181 |
+
distribute the Program or its derivative works. These actions are
|
| 182 |
+
prohibited by law if you do not accept this License. Therefore, by
|
| 183 |
+
modifying or distributing the Program (or any work based on the
|
| 184 |
+
Program), you indicate your acceptance of this License to do so, and
|
| 185 |
+
all its terms and conditions for copying, distributing or modifying
|
| 186 |
+
the Program or works based on it.
|
| 187 |
+
|
| 188 |
+
6. Each time you redistribute the Program (or any work based on the
|
| 189 |
+
Program), the recipient automatically receives a license from the
|
| 190 |
+
original licensor to copy, distribute or modify the Program subject to
|
| 191 |
+
these terms and conditions. You may not impose any further
|
| 192 |
+
restrictions on the recipients' exercise of the rights granted
|
| 193 |
+
herein. You are not responsible for enforcing compliance by third
|
| 194 |
+
parties to this License.
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
7. If, as a consequence of a court judgment or allegation of patent
|
| 198 |
+
infringement or for any other reason (not limited to patent issues),
|
| 199 |
+
conditions are imposed on you (whether by court order, agreement or
|
| 200 |
+
otherwise) that contradict the conditions of this License, they do not
|
| 201 |
+
excuse you from the conditions of this License. If you cannot
|
| 202 |
+
distribute so as to satisfy simultaneously your obligations under this
|
| 203 |
+
License and any other pertinent obligations, then as a consequence you
|
| 204 |
+
may not distribute the Program at all. For example, if a patent
|
| 205 |
+
license would not permit royalty-free redistribution of the Program by
|
| 206 |
+
all those who receive copies directly or indirectly through you, then
|
| 207 |
+
the only way you could satisfy both it and this License would be to
|
| 208 |
+
refrain entirely from distribution of the Program.
|
| 209 |
+
|
| 210 |
+
If any portion of this section is held invalid or unenforceable under
|
| 211 |
+
any particular circumstance, the balance of the section is intended to
|
| 212 |
+
apply and the section as a whole is intended to apply in other
|
| 213 |
+
circumstances.
|
| 214 |
+
|
| 215 |
+
It is not the purpose of this section to induce you to infringe any
|
| 216 |
+
patents or other property right claims or to contest validity of any
|
| 217 |
+
such claims; this section has the sole purpose of protecting the
|
| 218 |
+
integrity of the free software distribution system, which is
|
| 219 |
+
implemented by public license practices. Many people have made
|
| 220 |
+
generous contributions to the wide range of software distributed
|
| 221 |
+
through that system in reliance on consistent application of that
|
| 222 |
+
system; it is up to the author/donor to decide if he or she is willing
|
| 223 |
+
to distribute software through any other system and a licensee cannot
|
| 224 |
+
impose that choice.
|
| 225 |
+
|
| 226 |
+
This section is intended to make thoroughly clear what is believed to
|
| 227 |
+
be a consequence of the rest of this License.
|
| 228 |
+
|
| 229 |
+
8. If the distribution and/or use of the Program is restricted in
|
| 230 |
+
certain countries either by patents or by copyrighted interfaces, the
|
| 231 |
+
original copyright holder who places the Program under this License
|
| 232 |
+
may add an explicit geographical distribution limitation excluding
|
| 233 |
+
those countries, so that distribution is permitted only in or among
|
| 234 |
+
countries not thus excluded. In such case, this License incorporates
|
| 235 |
+
the limitation as if written in the body of this License.
|
| 236 |
+
|
| 237 |
+
9. The Free Software Foundation may publish revised and/or new
|
| 238 |
+
versions of the General Public License from time to time. Such new
|
| 239 |
+
versions will be similar in spirit to the present version, but may
|
| 240 |
+
differ in detail to address new problems or concerns.
|
| 241 |
+
|
| 242 |
+
Each version is given a distinguishing version number. If the Program
|
| 243 |
+
specifies a version number of this License which applies to it and
|
| 244 |
+
"any later version", you have the option of following the terms and
|
| 245 |
+
conditions either of that version or of any later version published by
|
| 246 |
+
the Free Software Foundation. If the Program does not specify a
|
| 247 |
+
version number of this License, you may choose any version ever
|
| 248 |
+
published by the Free Software Foundation.
|
| 249 |
+
|
| 250 |
+
10. If you wish to incorporate parts of the Program into other free
|
| 251 |
+
programs whose distribution conditions are different, write to the
|
| 252 |
+
author to ask for permission. For software which is copyrighted by the
|
| 253 |
+
Free Software Foundation, write to the Free Software Foundation; we
|
| 254 |
+
sometimes make exceptions for this. Our decision will be guided by the
|
| 255 |
+
two goals of preserving the free status of all derivatives of our free
|
| 256 |
+
software and of promoting the sharing and reuse of software generally.
|
| 257 |
+
|
| 258 |
+
NO WARRANTY
|
| 259 |
+
|
| 260 |
+
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
|
| 261 |
+
WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
|
| 262 |
+
LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS
|
| 263 |
+
AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF
|
| 264 |
+
ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
| 265 |
+
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
| 266 |
+
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
|
| 267 |
+
PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
|
| 268 |
+
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
|
| 272 |
+
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
|
| 273 |
+
AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU
|
| 274 |
+
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
|
| 275 |
+
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
|
| 276 |
+
PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
|
| 277 |
+
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
|
| 278 |
+
FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF
|
| 279 |
+
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
| 280 |
+
DAMAGES.
|
| 281 |
+
|
| 282 |
+
END OF TERMS AND CONDITIONS
|
tools/giza-pp/GIZA++-v2/Makefile
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.SUFFIXES: .out .o .c .e .r .f .y .l .s .p .cpp .alpha2o .pentiumo .sgio .alphao
|
| 2 |
+
|
| 3 |
+
INSTALLDIR ?= /usr/local/bin/
|
| 4 |
+
|
| 5 |
+
#CXX = g++
|
| 6 |
+
|
| 7 |
+
CFLAGS = $(CFLAGS_GLOBAL) -Wall -Wno-parentheses
|
| 8 |
+
#CFLAGS_OPT = $(CFLAGS) -O3 -DNDEBUG -DWORDINDEX_WITH_4_BYTE -O3 -DNDEBUG -DWORDINDEX_WITH_4_BYTE -ffast-math
|
| 9 |
+
CFLAGS_OPT = $(CFLAGS) -O3 -funroll-loops -DNDEBUG -DWORDINDEX_WITH_4_BYTE -DBINARY_SEARCH_FOR_TTABLE -DWORDINDEX_WITH_4_BYTE
|
| 10 |
+
CFLAGS_PRF = $(CFLAGS) -O2 -pg -DNDEBUG -DWORDINDEX_WITH_4_BYTE
|
| 11 |
+
CFLAGS_DBG = $(CFLAGS) -g -DDEBUG -DWORDINDEX_WITH_4_BYTE
|
| 12 |
+
CFLAGS_NRM = $(CFLAGS) -DWORDINDEX_WITH_4_BYTE
|
| 13 |
+
CFLAGS_VDBG = $(CFLAGS) -g -DDEBUG -DWORDINDEX_WITH_4_BYTE -DVDEBUG
|
| 14 |
+
SRC = *.cpp
|
| 15 |
+
TYPE =
|
| 16 |
+
LDFLAGS =
|
| 17 |
+
|
| 18 |
+
include Makefile.src
|
| 19 |
+
|
| 20 |
+
OBJ_DIR_PRF = profile/
|
| 21 |
+
OBJ_DIR_OPT = optimized/
|
| 22 |
+
OBJ_DIR_DBG = debug/
|
| 23 |
+
OBJ_DIR_VDBG = vdebug/
|
| 24 |
+
OBJ_DIR_NRM = norm/
|
| 25 |
+
OBJ_OPT2 = ${SRC2:%.cpp=$(OBJ_DIR_OPT)%.o}
|
| 26 |
+
OBJ_OPT = ${SRC:%.cpp=$(OBJ_DIR_OPT)%.o}
|
| 27 |
+
OBJ_DBG = ${SRC:%.cpp=$(OBJ_DIR_DBG)%.o}
|
| 28 |
+
OBJ_VDBG = ${SRC:%.cpp=$(OBJ_DIR_VDBG)%.o}
|
| 29 |
+
OBJ_NRM = ${SRC:%.cpp=$(OBJ_DIR_NRM)%.o}
|
| 30 |
+
OBJ_PRF = ${SRC:%.cpp=$(OBJ_DIR_PRF)%.o}
|
| 31 |
+
OBJ_DIR =
|
| 32 |
+
DATE = `date +%d-%m-%Y`
|
| 33 |
+
|
| 34 |
+
opt: GIZA++ snt2plain.out plain2snt.out snt2cooc.out
|
| 35 |
+
|
| 36 |
+
GIZA++: $(OBJ_DIR_OPT) $(OBJ_OPT)
|
| 37 |
+
$(CXX) $(CFLAGS_OPT) $(OBJ_OPT) $(LDFLAGS) -o GIZA++
|
| 38 |
+
|
| 39 |
+
prf: GIZA++.prf
|
| 40 |
+
|
| 41 |
+
GIZA++.prf: $(OBJ_DIR_PRF) $(OBJ_PRF)
|
| 42 |
+
$(CXX) $(CFLAGS_PRF) $(OBJ_PRF) -o GIZA++.prf $(LDFLAGS)
|
| 43 |
+
|
| 44 |
+
dbg: GIZA++.dbg
|
| 45 |
+
|
| 46 |
+
GIZA++.dbg: $(OBJ_DIR_DBG) $(OBJ_DBG)
|
| 47 |
+
$(CXX) $(CFLAGS_DBG) $(OBJ_DBG) -o GIZA++.dbg $(LDFLAGS)
|
| 48 |
+
|
| 49 |
+
vdbg: GIZA++.vdbg
|
| 50 |
+
|
| 51 |
+
GIZA++.vdbg: $(OBJ_DIR_VDBG) $(OBJ_VDBG)
|
| 52 |
+
$(CXX) $(CFLAGS_VDBG) $(OBJ_VDBG) -o GIZA++.vdbg $(LDFLAGS)
|
| 53 |
+
|
| 54 |
+
nrm: GIZA++.nrm
|
| 55 |
+
|
| 56 |
+
GIZA++.nrm: $(OBJ_DIR_NRM) $(OBJ_NRM)
|
| 57 |
+
$(CXX) $(CFLAGS_NRM) $(OBJ_NRM) -o GIZA++.nrm $(LDFLAGS)
|
| 58 |
+
|
| 59 |
+
all: dbg opt nrm prf
|
| 60 |
+
|
| 61 |
+
$(OBJ_DIR_PRF): $(OBJ_DIR)
|
| 62 |
+
-mkdir $(OBJ_DIR_PRF)
|
| 63 |
+
|
| 64 |
+
$(OBJ_DIR_OPT): $(OBJ_DIR)
|
| 65 |
+
-mkdir $(OBJ_DIR_OPT)
|
| 66 |
+
|
| 67 |
+
$(OBJ_DIR_DBG): $(OBJ_DIR)
|
| 68 |
+
-mkdir $(OBJ_DIR_DBG)
|
| 69 |
+
|
| 70 |
+
$(OBJ_DIR_VDBG): $(OBJ_DIR)
|
| 71 |
+
-mkdir $(OBJ_DIR_VDBG)
|
| 72 |
+
|
| 73 |
+
$(OBJ_DIR_NRM): $(OBJ_DIR)
|
| 74 |
+
-mkdir $(OBJ_DIR_NRM)
|
| 75 |
+
|
| 76 |
+
$(OBJ_DIR):
|
| 77 |
+
-mkdir $(OBJ_DIR)
|
| 78 |
+
|
| 79 |
+
$(OBJ_DIR_DBG)%.o: %.cpp
|
| 80 |
+
$(CXX) $(CFLAGS_DBG) -c $< -o $@
|
| 81 |
+
|
| 82 |
+
$(OBJ_DIR_VDBG)%.o: %.cpp
|
| 83 |
+
$(CXX) $(CFLAGS_VDBG) -c $< -o $@
|
| 84 |
+
|
| 85 |
+
$(OBJ_DIR_NRM)%.o: %.cpp
|
| 86 |
+
$(CXX) $(CFLAGS_NRM) -c $< -o $@
|
| 87 |
+
|
| 88 |
+
$(OBJ_DIR_PRF)%.o: %.cpp
|
| 89 |
+
$(CXX) $(CFLAGS_PRF) -c $< -o $@
|
| 90 |
+
|
| 91 |
+
$(OBJ_DIR_OPT)%.o: %.cpp
|
| 92 |
+
$(CXX) $(CFLAGS_OPT) -c $< -o $@
|
| 93 |
+
|
| 94 |
+
iinstall: opt prf dbg
|
| 95 |
+
-mkdir $(INSTALLDIR)/$(ARCH)
|
| 96 |
+
-cp GIZA++ $(INSTALLDIR)/GIZA++
|
| 97 |
+
-cp GIZA++.prf $(INSTALLDIR)/GIZA++.prf
|
| 98 |
+
-cp GIZA++.dbg $(INSTALLDIR)/GIZA++.dbg
|
| 99 |
+
|
| 100 |
+
install: opt
|
| 101 |
+
-mkdir $(INSTALLDIR)
|
| 102 |
+
-cp GIZA++ $(INSTALLDIR)/GIZA++
|
| 103 |
+
|
| 104 |
+
clean:
|
| 105 |
+
-rm -f $(OBJ_DIR_NRM)/*.o $(OBJ_DIR_DBG)/*.o $(OBJ_DIR_VDBG)/*.o $(OBJ_DIR_PRF)/*.o $(OBJ_DIR_OPT)/*.o
|
| 106 |
+
-rm -rf $(OBJ_DIR_NRM) $(OBJ_DIR_DBG) $(OBJ_DIR_VDBG) $(OBJ_DIR_PRF) $(OBJ_DIR_OPT)
|
| 107 |
+
-rm -f snt2plain.out plain2snt.out snt2cooc.out GIZA++
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
backup: clean
|
| 111 |
+
tar cf - . | gzip -9 > ../GIZA++src.tar.gz
|
| 112 |
+
|
| 113 |
+
depend: depend_CLEAN dependencies
|
| 114 |
+
|
| 115 |
+
depend_CLEAN:
|
| 116 |
+
rm dependencies
|
| 117 |
+
|
| 118 |
+
dependencies:
|
| 119 |
+
@(echo "#Automatically generated dependecy list" >> dependencies ;\
|
| 120 |
+
$(CXX) -MM *.cpp $(CFLAGS_OPT) | perl -e 'while(<>){s?^([^\:]+\.o:)?$(OBJ_DIR_OPT)\1?g;print;}'>> dependencies)
|
| 121 |
+
@(echo "#Automatically generated dependecy list" >> dependencies ;\
|
| 122 |
+
$(CXX) -MM *.cpp $(CFLAGS_DBG) | perl -e 'while(<>){s?^([^\:]+\.o:)?$(OBJ_DIR_DBG)\1?g;print;}'>> dependencies)
|
| 123 |
+
@(echo "#Automatically generated dependecy list" >> dependencies ;\
|
| 124 |
+
$(CXX) -MM *.cpp $(CFLAGS_VDBG) | perl -e 'while(<>){s?^([^\:]+\.o:)?$(OBJ_DIR_VDBG)\1?g;print;}'>> dependencies)
|
| 125 |
+
@(echo "#Automatically generated dependecy list" >> dependencies ;\
|
| 126 |
+
$(CXX) -MM *.cpp $(CFLAGS_NRM) | perl -e 'while(<>){s?^([^\:]+\.o:)?$(OBJ_DIR_NRM)\1?g;print;}'>> dependencies)
|
| 127 |
+
@(echo "#Automatically generated dependecy list" >> dependencies ;\
|
| 128 |
+
$(CXX) -MM *.cpp $(CFLAGS_PRF) | perl -e 'while(<>){s?^([^\:]+\.o:)?$(OBJ_DIR_PRF)\1?g;print;}'>> dependencies)
|
| 129 |
+
|
| 130 |
+
-include dependencies
|
| 131 |
+
|
| 132 |
+
snt2plain.out: snt2plain.cpp
|
| 133 |
+
$(CXX) $(LDFLAGS) -O3 -W -Wall snt2plain.cpp -o snt2plain.out
|
| 134 |
+
|
| 135 |
+
plain2snt.out: plain2snt.cpp
|
| 136 |
+
$(CXX) $(LDFLAGS) -O3 -W -Wall plain2snt.cpp -o plain2snt.out
|
| 137 |
+
|
| 138 |
+
snt2cooc.out: snt2cooc.cpp
|
| 139 |
+
$(CXX) $(LDFLAGS) -O3 -g -W -Wall snt2cooc.cpp -o snt2cooc.out
|
| 140 |
+
|
tools/giza-pp/GIZA++-v2/Makefile.definitions
ADDED
|
File without changes
|
tools/giza-pp/GIZA++-v2/Makefile.src
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SRC = Parameter.cpp myassert.cpp Perplexity.cpp model1.cpp model2.cpp model3.cpp getSentence.cpp TTables.cpp ATables.cpp AlignTables.cpp main.cpp NTables.cpp model2to3.cpp collCounts.cpp alignment.cpp vocab.cpp MoveSwapMatrix.cpp transpair_model3.cpp transpair_model5.cpp transpair_model4.cpp utility.cpp parse.cpp reports.cpp model3_viterbi.cpp model3_viterbi_with_tricks.cpp Dictionary.cpp model345-peg.cpp hmm.cpp HMMTables.cpp ForwardBackward.cpp
|
| 2 |
+
|
tools/giza-pp/GIZA++-v2/MoveSwapMatrix.cpp
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#include "MoveSwapMatrix.h"
|
| 24 |
+
|
| 25 |
+
template<class TRANSPAIR>
|
| 26 |
+
MoveSwapMatrix<TRANSPAIR>::MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a)
|
| 27 |
+
: alignment(_a), ef(_ef), l(ef.get_l()), m(ef.get_m()), _cmove(l+1, m+1), _cswap(m+1, m+1),
|
| 28 |
+
delmove(l+1, m+1,0),delswap(m+1, m+1,0),changed(l+2, 0), changedCounter(1),
|
| 29 |
+
modelnr(_ef.modelnr()),lazyEvaluation(0),centerDeleted(0)
|
| 30 |
+
{
|
| 31 |
+
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
| 32 |
+
if( lazyEvaluation==0)
|
| 33 |
+
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
template<class TRANSPAIR>
|
| 37 |
+
void MoveSwapMatrix<TRANSPAIR>::updateJ(WordIndex j, bool useChanged,double thisValue)
|
| 38 |
+
{
|
| 39 |
+
massert( lazyEvaluation==0 );
|
| 40 |
+
for(WordIndex i=0;i<=l;i++)
|
| 41 |
+
if( (useChanged==0||changed[i]!=changedCounter) )
|
| 42 |
+
if( get_al(j)!=i )
|
| 43 |
+
_cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
|
| 44 |
+
else
|
| 45 |
+
_cmove(i, j)=1.0;
|
| 46 |
+
for(WordIndex j2=j+1;j2<=m;j2++)
|
| 47 |
+
if( get_al(j)!=get_al(j2) )
|
| 48 |
+
_cswap(j, j2)=ef.scoreOfSwap((*this), j, j2,thisValue);
|
| 49 |
+
else
|
| 50 |
+
_cswap(j, j2)=1.0;
|
| 51 |
+
for(WordIndex j2=1;j2<j;j2++)
|
| 52 |
+
if( get_al(j)!=get_al(j2) )
|
| 53 |
+
_cswap(j2, j)=ef.scoreOfSwap((*this), j2, j,thisValue);
|
| 54 |
+
else
|
| 55 |
+
_cswap(j2, j)=1.0;
|
| 56 |
+
}
|
| 57 |
+
template<class TRANSPAIR>
|
| 58 |
+
void MoveSwapMatrix<TRANSPAIR>::updateI(WordIndex i,double thisValue)
|
| 59 |
+
{
|
| 60 |
+
massert( lazyEvaluation==0);
|
| 61 |
+
for(WordIndex j=1;j<=m;j++)
|
| 62 |
+
if( get_al(j)!=i )
|
| 63 |
+
_cmove(i, j)=ef.scoreOfMove((*this), i, j,thisValue);
|
| 64 |
+
else
|
| 65 |
+
_cmove(i, j)=1.0;
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
template<class TRANSPAIR>
|
| 69 |
+
void MoveSwapMatrix<TRANSPAIR>::printWrongs()const{
|
| 70 |
+
for(WordIndex i=0;i<=l;i++)
|
| 71 |
+
{
|
| 72 |
+
for(WordIndex j=1;j<=m;j++)
|
| 73 |
+
if( get_al(j)==i)
|
| 74 |
+
cout << "A";
|
| 75 |
+
else
|
| 76 |
+
{
|
| 77 |
+
LogProb real=_cmove(i, j), wanted=ef.scoreOfMove((*this), i, j);
|
| 78 |
+
if( fabs(1.0-real/wanted)>1e-3 )
|
| 79 |
+
cout << 'b';
|
| 80 |
+
else if(fabs(1.0-real/wanted)>1e-10 )
|
| 81 |
+
cout << 'e';
|
| 82 |
+
else if(real!=wanted)
|
| 83 |
+
cout << 'E';
|
| 84 |
+
else
|
| 85 |
+
cout << ' ';
|
| 86 |
+
}
|
| 87 |
+
cout << endl;
|
| 88 |
+
}
|
| 89 |
+
cout << endl;
|
| 90 |
+
for(WordIndex j=1;j<=m;j++)
|
| 91 |
+
{
|
| 92 |
+
for(WordIndex j1=1;j1<=m;j1++)
|
| 93 |
+
if( j1>j )
|
| 94 |
+
{
|
| 95 |
+
if( get_al(j)==get_al(j1) )
|
| 96 |
+
cout << 'A';
|
| 97 |
+
else
|
| 98 |
+
cout << (_cswap(j, j1)==ef.scoreOfSwap((*this), j, j1));
|
| 99 |
+
}
|
| 100 |
+
else
|
| 101 |
+
cout << ' ';
|
| 102 |
+
cout << endl;
|
| 103 |
+
}
|
| 104 |
+
massert(0);
|
| 105 |
+
}
|
| 106 |
+
template<class TRANSPAIR>
|
| 107 |
+
bool MoveSwapMatrix<TRANSPAIR>::isRight()const{
|
| 108 |
+
if( lazyEvaluation )
|
| 109 |
+
return 1;
|
| 110 |
+
for(WordIndex i=0;i<=l;i++)
|
| 111 |
+
for(WordIndex j=1;j<=m;j++)
|
| 112 |
+
if( get_al(j)!=i && (!(doubleEqual(_cmove(i, j), ef.scoreOfMove((*this), i, j)))) )
|
| 113 |
+
{
|
| 114 |
+
cerr << "DIFF: " << i << " " << j << " " << _cmove(i, j) << " " << ef.scoreOfMove((*this), i, j) << endl;
|
| 115 |
+
return 0;
|
| 116 |
+
}
|
| 117 |
+
for(WordIndex j=1;j<=m;j++)
|
| 118 |
+
for(WordIndex j1=1;j1<=m;j1++)
|
| 119 |
+
if( j1>j&&get_al(j)!=get_al(j1)&&(!doubleEqual(_cswap(j, j1), ef.scoreOfSwap((*this), j, j1))) )
|
| 120 |
+
{
|
| 121 |
+
cerr << "DIFFERENT: " << j << " " << j1 << " " << _cswap(j, j1) << " " << ef.scoreOfSwap((*this), j, j1) << endl;
|
| 122 |
+
return 0;
|
| 123 |
+
}
|
| 124 |
+
return 1;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
template<class TRANSPAIR>
|
| 128 |
+
void MoveSwapMatrix<TRANSPAIR>::doMove(WordIndex _i, WordIndex _j)
|
| 129 |
+
{
|
| 130 |
+
WordIndex old_i=get_al(_j);
|
| 131 |
+
if( lazyEvaluation )
|
| 132 |
+
set(_j,_i);
|
| 133 |
+
else
|
| 134 |
+
{
|
| 135 |
+
if ( modelnr==5||modelnr==6 )
|
| 136 |
+
{
|
| 137 |
+
set(_j, _i);
|
| 138 |
+
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
| 139 |
+
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
|
| 140 |
+
}
|
| 141 |
+
else if ( modelnr==4 )
|
| 142 |
+
{
|
| 143 |
+
changedCounter++;
|
| 144 |
+
for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
|
| 145 |
+
for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
|
| 146 |
+
set(_j, _i);
|
| 147 |
+
for(unsigned int k=prev_cept(old_i);k<=next_cept(old_i);++k)changed[k]=changedCounter;
|
| 148 |
+
for(unsigned int k=prev_cept(_i);k<=next_cept(_i);++k)changed[k]=changedCounter;
|
| 149 |
+
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
| 150 |
+
for(unsigned int i=0;i<=l;i++)
|
| 151 |
+
if(changed[i]==changedCounter)
|
| 152 |
+
updateI(i,thisValue);
|
| 153 |
+
for(unsigned int j=1;j<=m;j++)
|
| 154 |
+
if( changed[get_al(j)]==changedCounter )
|
| 155 |
+
updateJ(j, 1,thisValue);
|
| 156 |
+
}
|
| 157 |
+
else
|
| 158 |
+
{
|
| 159 |
+
assert(modelnr==3);
|
| 160 |
+
set(_j, _i);
|
| 161 |
+
changedCounter++;
|
| 162 |
+
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
| 163 |
+
updateI(old_i,thisValue);
|
| 164 |
+
changed[old_i]=changedCounter;
|
| 165 |
+
updateI(_i,thisValue);
|
| 166 |
+
changed[_i]=changedCounter;
|
| 167 |
+
for(WordIndex j=1;j<=m;j++)
|
| 168 |
+
if( get_al(j)==_i || get_al(j)==old_i )
|
| 169 |
+
updateJ(j, 1,thisValue);
|
| 170 |
+
}
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
template<class TRANSPAIR>
|
| 174 |
+
void MoveSwapMatrix<TRANSPAIR>::doSwap(WordIndex _j1, WordIndex _j2)
|
| 175 |
+
{
|
| 176 |
+
assert( cswap(_j1, _j2)>1 );
|
| 177 |
+
WordIndex i1=get_al(_j1), i2=get_al(_j2);
|
| 178 |
+
if( lazyEvaluation==1 )
|
| 179 |
+
{
|
| 180 |
+
set(_j1, i2);
|
| 181 |
+
set(_j2, i1);
|
| 182 |
+
}
|
| 183 |
+
else
|
| 184 |
+
{
|
| 185 |
+
if ( modelnr==5||modelnr==6 )
|
| 186 |
+
{
|
| 187 |
+
set(_j1, i2);
|
| 188 |
+
set(_j2, i1);
|
| 189 |
+
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
| 190 |
+
for(WordIndex j=1;j<=m;j++)updateJ(j, 0,thisValue);
|
| 191 |
+
}
|
| 192 |
+
else if( modelnr==4 )
|
| 193 |
+
{
|
| 194 |
+
changedCounter++;
|
| 195 |
+
for(unsigned int k=prev_cept(i1);k<=next_cept(i1);++k)changed[k]=changedCounter;
|
| 196 |
+
for(unsigned int k=prev_cept(i2);k<=next_cept(i2);++k)changed[k]=changedCounter;
|
| 197 |
+
set(_j1, i2);
|
| 198 |
+
set(_j2, i1);
|
| 199 |
+
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
| 200 |
+
for(unsigned int i=0;i<=l;i++)
|
| 201 |
+
if(changed[i]==changedCounter)
|
| 202 |
+
updateI(i,thisValue);
|
| 203 |
+
for(unsigned int j=1;j<=m;j++)
|
| 204 |
+
if( changed[get_al(j)]==changedCounter )
|
| 205 |
+
updateJ(j, 1,thisValue);
|
| 206 |
+
}
|
| 207 |
+
else
|
| 208 |
+
{
|
| 209 |
+
assert(modelnr==3);
|
| 210 |
+
set(_j1, i2);
|
| 211 |
+
set(_j2, i1);
|
| 212 |
+
changedCounter++;
|
| 213 |
+
double thisValue=ef.scoreOfAlignmentForChange((*this));
|
| 214 |
+
updateI(i1,thisValue);
|
| 215 |
+
changed[i1]=changedCounter;
|
| 216 |
+
updateI(i2,thisValue);
|
| 217 |
+
changed[i2]=changedCounter;
|
| 218 |
+
updateJ(_j1, 1,thisValue);
|
| 219 |
+
updateJ(_j2, 1,thisValue);
|
| 220 |
+
}
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
#include "transpair_model3.h"
|
| 225 |
+
#include "transpair_model4.h"
|
| 226 |
+
#include "transpair_model5.h"
|
| 227 |
+
#include "transpair_modelhmm.h"
|
| 228 |
+
template class MoveSwapMatrix<transpair_model3>;
|
| 229 |
+
template class MoveSwapMatrix<transpair_model4>;
|
| 230 |
+
template class MoveSwapMatrix<transpair_model5>;
|
| 231 |
+
template class MoveSwapMatrix<transpair_modelhmm>;
|
tools/giza-pp/GIZA++-v2/MoveSwapMatrix.h
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/*--
|
| 23 |
+
MoveSwapMatrix: Efficient representation for moving and swapping
|
| 24 |
+
around in IBM3 training.
|
| 25 |
+
Franz Josef Och (30/07/99)
|
| 26 |
+
--*/
|
| 27 |
+
#ifndef moveswap2_costs_h_defined
|
| 28 |
+
#define moveswap2_costs_h_defined
|
| 29 |
+
#include "alignment.h"
|
| 30 |
+
#include "transpair_model3.h"
|
| 31 |
+
#include "myassert.h"
|
| 32 |
+
|
| 33 |
+
extern short DoViterbiTraining;
|
| 34 |
+
|
| 35 |
+
template<class TRANSPAIR>
|
| 36 |
+
class MoveSwapMatrix : public alignment
|
| 37 |
+
{
|
| 38 |
+
private:
|
| 39 |
+
const TRANSPAIR&ef;
|
| 40 |
+
const WordIndex l, m;
|
| 41 |
+
Array2<LogProb, Vector<LogProb> > _cmove, _cswap;
|
| 42 |
+
Array2<char,Vector<char> > delmove,delswap;
|
| 43 |
+
Vector<int> changed;
|
| 44 |
+
int changedCounter;
|
| 45 |
+
const int modelnr;
|
| 46 |
+
bool lazyEvaluation;
|
| 47 |
+
bool centerDeleted;
|
| 48 |
+
public:
|
| 49 |
+
bool check()const
|
| 50 |
+
{
|
| 51 |
+
return 1;
|
| 52 |
+
}
|
| 53 |
+
const TRANSPAIR&get_ef()const
|
| 54 |
+
{return ef;}
|
| 55 |
+
bool isCenterDeleted()const
|
| 56 |
+
{return centerDeleted;}
|
| 57 |
+
bool isLazy()const
|
| 58 |
+
{return lazyEvaluation;}
|
| 59 |
+
MoveSwapMatrix(const TRANSPAIR&_ef, const alignment&_a);
|
| 60 |
+
void updateJ(WordIndex j, bool,double thisValue);
|
| 61 |
+
void updateI(WordIndex i,double thisValue);
|
| 62 |
+
void doMove(WordIndex _i, WordIndex _j);
|
| 63 |
+
void doSwap(WordIndex _j1, WordIndex _j2);
|
| 64 |
+
void delCenter()
|
| 65 |
+
{
|
| 66 |
+
centerDeleted=1;
|
| 67 |
+
}
|
| 68 |
+
void delMove(WordIndex x, WordIndex y)
|
| 69 |
+
{
|
| 70 |
+
delmove(x,y)=1;
|
| 71 |
+
}
|
| 72 |
+
void delSwap(WordIndex x, WordIndex y)
|
| 73 |
+
{
|
| 74 |
+
massert(y>x);
|
| 75 |
+
delswap(x,y)=1;
|
| 76 |
+
delswap(y,x)=1;
|
| 77 |
+
}
|
| 78 |
+
bool isDelMove(WordIndex x, WordIndex y)const
|
| 79 |
+
{
|
| 80 |
+
return DoViterbiTraining||delmove(x,y);
|
| 81 |
+
}
|
| 82 |
+
bool isDelSwap(WordIndex x, WordIndex y)const
|
| 83 |
+
{
|
| 84 |
+
massert(y>x);
|
| 85 |
+
return DoViterbiTraining||delswap(x,y);
|
| 86 |
+
}
|
| 87 |
+
LogProb cmove(WordIndex x, WordIndex y)const
|
| 88 |
+
{
|
| 89 |
+
massert( get_al(y)!=x );
|
| 90 |
+
massert( delmove(x,y)==0 );
|
| 91 |
+
if( lazyEvaluation )
|
| 92 |
+
return ef.scoreOfMove(*this,x,y);
|
| 93 |
+
else
|
| 94 |
+
{
|
| 95 |
+
return _cmove(x, y);
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
LogProb cswap(WordIndex x, WordIndex y)const
|
| 99 |
+
{
|
| 100 |
+
massert(x<y);
|
| 101 |
+
massert(delswap(x,y)==0);
|
| 102 |
+
massert(get_al(x)!=get_al(y));
|
| 103 |
+
if( lazyEvaluation )
|
| 104 |
+
return ef.scoreOfSwap(*this,x,y);
|
| 105 |
+
else
|
| 106 |
+
{
|
| 107 |
+
massert(y>x);
|
| 108 |
+
return _cswap(x, y);
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
+
void printWrongs()const;
|
| 112 |
+
bool isRight()const;
|
| 113 |
+
friend ostream&operator<<(ostream&out, const MoveSwapMatrix<TRANSPAIR>&m)
|
| 114 |
+
{return out << (alignment)m << "\nEF:\n"<< m.ef << "\nCMOVE\n"<<m._cmove << "\nCSWAP\n" << m._cswap << endl;};
|
| 115 |
+
};
|
| 116 |
+
#endif
|
tools/giza-pp/GIZA++-v2/NTables.cpp
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
#include "NTables.h"
|
| 23 |
+
#include <iostream>
|
| 24 |
+
#include "defs.h"
|
| 25 |
+
#include <fstream>
|
| 26 |
+
#include "Parameter.h"
|
| 27 |
+
|
| 28 |
+
GLOBAL_PARAMETER(double,NTablesFactorGraphemes,"nSmooth","smoothing for fertility parameters (good value: 64): weight for wordlength-dependent fertility parameters",PARLEV_SMOOTH,64.0);
|
| 29 |
+
GLOBAL_PARAMETER(double,NTablesFactorGeneral,"nSmoothGeneral","smoothing for fertility parameters (default: 0): weight for word-independent fertility parameters",PARLEV_SMOOTH,0.0);
|
| 30 |
+
|
| 31 |
+
template <class VALTYPE>
|
| 32 |
+
void nmodel<VALTYPE>::printNTable(int noEW, const char* filename,
|
| 33 |
+
const Vector<WordEntry>& evlist,
|
| 34 |
+
bool actual) const
|
| 35 |
+
// prints the fertility table but with actual sourcce words (not their id)
|
| 36 |
+
{
|
| 37 |
+
cerr << "Dumping nTable to: " << filename << '\n';
|
| 38 |
+
ofstream of(filename);
|
| 39 |
+
VALTYPE p ;
|
| 40 |
+
WordIndex k, i ;
|
| 41 |
+
for(i=1; int(i) < noEW; i++){
|
| 42 |
+
if (evlist[i].freq > 0){
|
| 43 |
+
if (actual)
|
| 44 |
+
of << evlist[i].word << ' ' ;
|
| 45 |
+
else
|
| 46 |
+
of << i << ' ' ;
|
| 47 |
+
for( k=0; k < MAX_FERTILITY; k++){
|
| 48 |
+
p = getValue(i, k);
|
| 49 |
+
if (p <= PROB_SMOOTH)
|
| 50 |
+
p = 0;
|
| 51 |
+
of << p << ' ';
|
| 52 |
+
}
|
| 53 |
+
of << '\n';
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
template <class VALTYPE>
|
| 59 |
+
void nmodel<VALTYPE>::readNTable(const char *filename){
|
| 60 |
+
/* This function reads the n table from a file.
|
| 61 |
+
Each line is of the format: source_word_id p0 p1 p2 ... pn
|
| 62 |
+
This is the inverse operation of the printTable function.
|
| 63 |
+
NAS, 7/11/99
|
| 64 |
+
*/
|
| 65 |
+
ifstream inf(filename);
|
| 66 |
+
cerr << "Reading fertility table from " << filename << "\n";
|
| 67 |
+
if(!inf){
|
| 68 |
+
cerr << "\nERROR: Cannot open " << filename <<"\n";
|
| 69 |
+
return;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
VALTYPE prob;
|
| 73 |
+
WordIndex tok, i;
|
| 74 |
+
int nFert=0;
|
| 75 |
+
while(!inf.eof()){
|
| 76 |
+
nFert++;
|
| 77 |
+
inf >> ws >> tok;
|
| 78 |
+
if (tok > MAX_VOCAB_SIZE){
|
| 79 |
+
cerr << "NTables:readNTable(): unrecognized token id: " << tok
|
| 80 |
+
<<'\n';
|
| 81 |
+
exit(-1);
|
| 82 |
+
}
|
| 83 |
+
for(i = 0; i < MAX_FERTILITY; i++){
|
| 84 |
+
inf >> ws >> prob;
|
| 85 |
+
getRef(tok, i)=prob;
|
| 86 |
+
}
|
| 87 |
+
}
|
| 88 |
+
cerr << "Read " << nFert << " entries in fertility table.\n";
|
| 89 |
+
inf.close();
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
template class nmodel<COUNT>;
|
| 93 |
+
//template class nmodel<PROB>;
|
tools/giza-pp/GIZA++-v2/NTables.h
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
#ifndef _ntables_h
|
| 23 |
+
#define _ntables_h 1
|
| 24 |
+
#include "Array2.h"
|
| 25 |
+
#include "Vector.h"
|
| 26 |
+
#include <cassert>
|
| 27 |
+
#include "defs.h"
|
| 28 |
+
#include "vocab.h"
|
| 29 |
+
#include "myassert.h"
|
| 30 |
+
#include "Globals.h"
|
| 31 |
+
|
| 32 |
+
extern double NTablesFactorGraphemes,NTablesFactorGeneral;
|
| 33 |
+
|
| 34 |
+
template <class VALTYPE>
|
| 35 |
+
class nmodel
|
| 36 |
+
{
|
| 37 |
+
private:
|
| 38 |
+
Array2<VALTYPE, Vector<VALTYPE> > ntab;
|
| 39 |
+
public:
|
| 40 |
+
nmodel(int maxw, int maxn)
|
| 41 |
+
: ntab(maxw, maxn, 0.0)
|
| 42 |
+
{}
|
| 43 |
+
VALTYPE getValue(int w, unsigned int n)const
|
| 44 |
+
{
|
| 45 |
+
massert(w!=0);
|
| 46 |
+
if(n>=ntab.getLen2())
|
| 47 |
+
return 0.0;
|
| 48 |
+
else
|
| 49 |
+
return max(ntab(w, n), VALTYPE(PROB_SMOOTH));
|
| 50 |
+
}
|
| 51 |
+
VALTYPE&getRef(int w, int n)
|
| 52 |
+
{
|
| 53 |
+
//massert(w!=0);
|
| 54 |
+
return ntab(w, n);
|
| 55 |
+
}
|
| 56 |
+
template<class COUNT>
|
| 57 |
+
void normalize(nmodel<COUNT>&write,const Vector<WordEntry>* _evlist)const
|
| 58 |
+
{
|
| 59 |
+
int h1=ntab.getLen1(), h2=ntab.getLen2();
|
| 60 |
+
int nParams=0;
|
| 61 |
+
if( _evlist&&(NTablesFactorGraphemes||NTablesFactorGeneral) )
|
| 62 |
+
{
|
| 63 |
+
size_t maxlen=0;
|
| 64 |
+
const Vector<WordEntry>&evlist=*_evlist;
|
| 65 |
+
for(unsigned int i=1;i<evlist.size();i++)
|
| 66 |
+
maxlen=max(maxlen,evlist[i].word.length());
|
| 67 |
+
Array2<COUNT,Vector<COUNT> > counts(maxlen+1,MAX_FERTILITY+1,0.0);
|
| 68 |
+
Vector<COUNT> nprob_general(MAX_FERTILITY+1,0.0);
|
| 69 |
+
for(unsigned int i=1;i<min((unsigned int)h1,(unsigned int)evlist.size());i++)
|
| 70 |
+
{
|
| 71 |
+
int l=evlist[i].word.length();
|
| 72 |
+
for(int k=0;k<h2;k++)
|
| 73 |
+
{
|
| 74 |
+
counts(l,k)+=getValue(i,k);
|
| 75 |
+
nprob_general[k]+=getValue(i,k);
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
COUNT sum2=0;
|
| 79 |
+
for(unsigned int i=1;i<maxlen+1;i++)
|
| 80 |
+
{
|
| 81 |
+
COUNT sum=0.0;
|
| 82 |
+
for(int k=0;k<h2;k++)
|
| 83 |
+
sum+=counts(i,k);
|
| 84 |
+
sum2+=sum;
|
| 85 |
+
if( sum )
|
| 86 |
+
{
|
| 87 |
+
double average=0.0;
|
| 88 |
+
//cerr << "l: " << i << " " << sum << " ";
|
| 89 |
+
for(int k=0;k<h2;k++)
|
| 90 |
+
{
|
| 91 |
+
counts(i,k)/=sum;
|
| 92 |
+
//cerr << counts(i,k) << ' ';
|
| 93 |
+
average+=k*counts(i,k);
|
| 94 |
+
}
|
| 95 |
+
//cerr << "avg: " << average << endl;
|
| 96 |
+
//cerr << '\n';
|
| 97 |
+
}
|
| 98 |
+
}
|
| 99 |
+
for(unsigned int k=0;k<nprob_general.size();k++)
|
| 100 |
+
nprob_general[k]/=sum2;
|
| 101 |
+
|
| 102 |
+
for(int i=1;i<h1;i++)
|
| 103 |
+
{
|
| 104 |
+
int l=-1;
|
| 105 |
+
if((unsigned int)i<evlist.size())
|
| 106 |
+
l=evlist[i].word.length();
|
| 107 |
+
COUNT sum=0.0;
|
| 108 |
+
for(int k=0;k<h2;k++)
|
| 109 |
+
sum+=getValue(i, k)+((l==-1)?0.0:(counts(l,k)*NTablesFactorGraphemes)) + NTablesFactorGeneral*nprob_general[k];
|
| 110 |
+
assert(sum);
|
| 111 |
+
for(int k=0;k<h2;k++)
|
| 112 |
+
{
|
| 113 |
+
write.getRef(i, k)=(getValue(i, k)+((l==-1)?0.0:(counts(l,k)*NTablesFactorGraphemes)))/sum + NTablesFactorGeneral*nprob_general[k];
|
| 114 |
+
nParams++;
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
+
}
|
| 118 |
+
else
|
| 119 |
+
for(int i=1;i<h1;i++)
|
| 120 |
+
{
|
| 121 |
+
COUNT sum=0.0;
|
| 122 |
+
for(int k=0;k<h2;k++)
|
| 123 |
+
sum+=getValue(i, k);
|
| 124 |
+
assert(sum);
|
| 125 |
+
for(int k=0;k<h2;k++)
|
| 126 |
+
{
|
| 127 |
+
write.getRef(i, k)=getValue(i, k)/sum;
|
| 128 |
+
nParams++;
|
| 129 |
+
}
|
| 130 |
+
}
|
| 131 |
+
cerr << "NTable contains " << nParams << " parameter.\n";
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
void clear()
|
| 135 |
+
{
|
| 136 |
+
int h1=ntab.getLen1(), h2=ntab.getLen2();
|
| 137 |
+
for(int i=0;i<h1;i++)for(int k=0;k<h2;k++)
|
| 138 |
+
ntab(i, k)=0;
|
| 139 |
+
}
|
| 140 |
+
void printNTable(int noEW, const char* filename, const Vector<WordEntry>& evlist, bool) const;
|
| 141 |
+
void readNTable(const char *filename);
|
| 142 |
+
|
| 143 |
+
};
|
| 144 |
+
|
| 145 |
+
#endif
|
tools/giza-pp/GIZA++-v2/Parameter.cpp
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#include "Parameter.h"
|
| 24 |
+
#include <fstream>
|
| 25 |
+
#include <unistd.h>
|
| 26 |
+
#include <sstream>
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
bool absolutePathNames=0;
|
| 30 |
+
string ParameterPathPrefix;
|
| 31 |
+
bool ParameterChangedFlag=0;
|
| 32 |
+
|
| 33 |
+
bool writeParameters(ofstream&of,const ParSet&parset,int level)
|
| 34 |
+
{
|
| 35 |
+
if(!of)return 0;
|
| 36 |
+
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
|
| 37 |
+
{
|
| 38 |
+
if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
|
| 39 |
+
{
|
| 40 |
+
ostringstream os;
|
| 41 |
+
(*i)->printValue(os);
|
| 42 |
+
os << ends;
|
| 43 |
+
string s(os.str());
|
| 44 |
+
of << (*i)->getString() << " ";
|
| 45 |
+
if( absolutePathNames&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
|
| 46 |
+
{
|
| 47 |
+
char path[1024];
|
| 48 |
+
getcwd(path,1024);
|
| 49 |
+
of << path << '/';
|
| 50 |
+
}
|
| 51 |
+
if( ParameterPathPrefix.length()&&(*i)->isFilename()&&s.length()&&s[0]!='/' )
|
| 52 |
+
of << ParameterPathPrefix << '/';
|
| 53 |
+
(*i)->printValue(of);
|
| 54 |
+
of << endl;
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
return 1;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
bool readParameters(ifstream&f,const ParSet&parset,int verb,int level)
|
| 61 |
+
{
|
| 62 |
+
string s;
|
| 63 |
+
if(!f)return 0;
|
| 64 |
+
while(getline(f,s))
|
| 65 |
+
{
|
| 66 |
+
istringstream eingabe(s);
|
| 67 |
+
string s1,s2;
|
| 68 |
+
eingabe>>s1>>s2;
|
| 69 |
+
if(makeSetCommand(s1,s2,parset,verb,level)==0)
|
| 70 |
+
cerr << "ERROR: could not set: (C) " << s1 << " " << s2 << endl;
|
| 71 |
+
}
|
| 72 |
+
return 1;
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
bool makeSetCommand(string _s1,string s2,const ParSet&parset,int verb,int level)
|
| 77 |
+
{
|
| 78 |
+
ParPtr anf;
|
| 79 |
+
int anfset=0;
|
| 80 |
+
string s1=simpleString(_s1);
|
| 81 |
+
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
|
| 82 |
+
{
|
| 83 |
+
if( *(*i)==s1 )
|
| 84 |
+
{
|
| 85 |
+
if( level==-1 || level==(*i)->getLevel() )
|
| 86 |
+
(*i)->setParameter(s2,verb);
|
| 87 |
+
else if(verb>1)
|
| 88 |
+
cerr << "ERROR: Could not set: (A) " << s1 << " " << s2 << " " << level << " " << (*i)->getLevel() << endl;
|
| 89 |
+
return 1;
|
| 90 |
+
}
|
| 91 |
+
else if( (*i)->getString().substr(0,s1.length())==s1 )
|
| 92 |
+
{
|
| 93 |
+
anf=(*i);anfset++;
|
| 94 |
+
}
|
| 95 |
+
}
|
| 96 |
+
if(anfset==1)
|
| 97 |
+
{
|
| 98 |
+
if( level==-1 || level==anf->getLevel() )
|
| 99 |
+
anf->setParameter(s2,verb);
|
| 100 |
+
else if( verb>1 )
|
| 101 |
+
cerr << "ERROR: Could not set: (B) " << s1 << " " << s2 << " " << level << " " << anf->getLevel() << endl;
|
| 102 |
+
return 1;
|
| 103 |
+
}
|
| 104 |
+
if( anfset>1 )
|
| 105 |
+
cerr << "ERROR: ambiguous parameter '" << s1 << "'.\n";
|
| 106 |
+
if( anfset==0 )
|
| 107 |
+
cerr << "ERROR: parameter '" << s1 << "' does not exist.\n";
|
| 108 |
+
return 0;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
ostream& printPars(ostream&of,const ParSet&parset,int level)
|
| 112 |
+
{
|
| 113 |
+
if(!of)return of;
|
| 114 |
+
for(ParSet::const_iterator i=parset.begin();i!=parset.end();++i)
|
| 115 |
+
{
|
| 116 |
+
if(((*i)->getLevel()==level||level==-1)&&(*i)->onlyCopy==0)
|
| 117 |
+
{
|
| 118 |
+
(*i)->printAt(of);
|
| 119 |
+
of << endl;
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
return of;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
string simpleString(const string s)
|
| 126 |
+
{
|
| 127 |
+
string k;
|
| 128 |
+
for(unsigned int i=0;i<s.length();++i)
|
| 129 |
+
{
|
| 130 |
+
char c[2];
|
| 131 |
+
c[0]=tolower(s[i]);
|
| 132 |
+
c[1]=0;
|
| 133 |
+
if( (c[0]>='a'&&c[0]<='z')||(c[0]>='0'&&c[0]<='9') )
|
| 134 |
+
k += c;
|
| 135 |
+
}
|
| 136 |
+
return k;
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
ParSet&getGlobalParSet()
|
| 141 |
+
{
|
| 142 |
+
static ParSet x;
|
| 143 |
+
return x;
|
| 144 |
+
}
|
tools/giza-pp/GIZA++-v2/Parameter.h
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef PARAMETER_H_DEFINED
|
| 24 |
+
#define PARAMETER_H_DEFINED
|
| 25 |
+
|
| 26 |
+
#include "mystl.h"
|
| 27 |
+
#include <set>
|
| 28 |
+
#include "Pointer.h"
|
| 29 |
+
#include <string>
|
| 30 |
+
#include "Globals.h"
|
| 31 |
+
#include <fstream>
|
| 32 |
+
#include <cstring>
|
| 33 |
+
|
| 34 |
+
inline unsigned int mConvert(const string&s,unsigned int &i)
|
| 35 |
+
{
|
| 36 |
+
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1; }
|
| 37 |
+
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
|
| 38 |
+
return i=atoi(s.c_str());
|
| 39 |
+
}
|
| 40 |
+
inline int mConvert(const string&s,int &i){
|
| 41 |
+
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return i=1;}
|
| 42 |
+
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return i=0;}
|
| 43 |
+
return i=atoi(s.c_str());
|
| 44 |
+
}
|
| 45 |
+
inline double mConvert(const string&s,double &d) { return d=atof(s.c_str()); }
|
| 46 |
+
inline double mConvert(const string&s,float &d) { return d=atof(s.c_str()); }
|
| 47 |
+
inline string mConvert(const string&s,string&n) { return n=s; }
|
| 48 |
+
inline bool mConvert(const string&s,bool&n) {
|
| 49 |
+
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
|
| 50 |
+
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
|
| 51 |
+
return n=atoi(s.c_str());
|
| 52 |
+
}
|
| 53 |
+
inline short mConvert(const string&s,short&n) {
|
| 54 |
+
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
|
| 55 |
+
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
|
| 56 |
+
return n=atoi(s.c_str());
|
| 57 |
+
}
|
| 58 |
+
inline unsigned short mConvert(const string&s,unsigned short&n) {
|
| 59 |
+
if( strcasecmp(s.c_str(),"yes")==0 || strcasecmp(s.c_str(),"y")==0 || strcasecmp(s.c_str(),"true")==0 || strcasecmp(s.c_str(),"t")==0 ) { cerr << "TRUE\n";return n=1;}
|
| 60 |
+
if( strcasecmp(s.c_str(),"no")==0 || strcasecmp(s.c_str(),"n")==0 || strcasecmp(s.c_str(),"false")==0 || strcasecmp(s.c_str(),"f")==0 ) { cerr << "FALSE\n";return n=0;}
|
| 61 |
+
return n=atoi(s.c_str());
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
string simpleString(const string s);
|
| 65 |
+
|
| 66 |
+
inline int Hashstring(const string& s)
|
| 67 |
+
{
|
| 68 |
+
int sum=0;
|
| 69 |
+
string::const_iterator i=s.begin(),end=s.end();
|
| 70 |
+
for(;i!=end;i++)sum=5*sum+(*i);
|
| 71 |
+
return sum;
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
class _Parameter
|
| 75 |
+
{
|
| 76 |
+
protected:
|
| 77 |
+
string name;
|
| 78 |
+
bool *ifChanged;
|
| 79 |
+
string description;
|
| 80 |
+
int level;
|
| 81 |
+
bool filename;
|
| 82 |
+
public:
|
| 83 |
+
int onlyCopy;
|
| 84 |
+
_Parameter(string n,bool&b,string desc,int _level,bool _onlyCopy)
|
| 85 |
+
: name(simpleString(n)),ifChanged(&b),description(desc),level(_level),filename(0),onlyCopy(_onlyCopy) {}
|
| 86 |
+
virtual ~_Parameter(){};
|
| 87 |
+
bool operator==(const string&s)const
|
| 88 |
+
{ return name== simpleString(s); }
|
| 89 |
+
void setChanged()
|
| 90 |
+
{ *ifChanged=true; }
|
| 91 |
+
virtual bool setParameter(string s2,int)=0;
|
| 92 |
+
virtual ostream&printAt(ostream&out)=0;
|
| 93 |
+
virtual ostream&printValue(ostream&out)=0;
|
| 94 |
+
const string&getString() const { return name; }
|
| 95 |
+
int getLevel() const { return level;}
|
| 96 |
+
bool isFilename() { return filename;}
|
| 97 |
+
void setFilename(bool x=1) { filename=x;}
|
| 98 |
+
friend bool operator==(const _Parameter&a,const _Parameter&b)
|
| 99 |
+
{ return a.name==b.name; }
|
| 100 |
+
friend bool operator<(const _Parameter&a,const _Parameter&b)
|
| 101 |
+
{ return a.name<b.name; }
|
| 102 |
+
friend int Hash(const _Parameter&aaa)
|
| 103 |
+
{ return Hashstring(aaa.name); }
|
| 104 |
+
friend ostream&operator<<(ostream&out,const _Parameter&p)
|
| 105 |
+
{ return out<<"Parameter: "<<p.name <<endl;}
|
| 106 |
+
};
|
| 107 |
+
|
| 108 |
+
template<class T>
|
| 109 |
+
class Parameter : public _Parameter
|
| 110 |
+
{
|
| 111 |
+
private:
|
| 112 |
+
T*t;
|
| 113 |
+
public:
|
| 114 |
+
Parameter(string n,bool&b,string desc,T&_t,int level=0,bool onlyCopy=0)
|
| 115 |
+
: _Parameter(n,b,desc,level,onlyCopy),t(&_t) {}
|
| 116 |
+
virtual ~Parameter(){}
|
| 117 |
+
virtual bool setParameter(string s2,int verb)
|
| 118 |
+
{
|
| 119 |
+
T x;
|
| 120 |
+
if( !(*t==mConvert(s2,x)))
|
| 121 |
+
{
|
| 122 |
+
bool printedFirst=0;
|
| 123 |
+
if( verb>1 )
|
| 124 |
+
{
|
| 125 |
+
cout << "Parameter '"<<name <<"' changed from '"<<*t<<"' to '";
|
| 126 |
+
printedFirst=1;
|
| 127 |
+
}
|
| 128 |
+
mConvert(s2,*t);
|
| 129 |
+
if( printedFirst )
|
| 130 |
+
cout << *t <<"'\n";
|
| 131 |
+
setChanged();
|
| 132 |
+
return 1;
|
| 133 |
+
}
|
| 134 |
+
return 0;
|
| 135 |
+
}
|
| 136 |
+
virtual ostream&printAt(ostream&out)
|
| 137 |
+
{return out << name << " = " << *t << " (" << description << ")";}
|
| 138 |
+
virtual ostream&printValue(ostream&out)
|
| 139 |
+
{return out << *t;}
|
| 140 |
+
};
|
| 141 |
+
|
| 142 |
+
typedef MP<_Parameter> ParPtr;
|
| 143 |
+
|
| 144 |
+
class ParSet : public set<ParPtr>
|
| 145 |
+
{
|
| 146 |
+
public:
|
| 147 |
+
void insert(const ParPtr&x)
|
| 148 |
+
{
|
| 149 |
+
if( count(x)!=0 )
|
| 150 |
+
cerr << "ERROR: element " << x->getString() << " already inserted.\n";
|
| 151 |
+
set<ParPtr>::insert(x);
|
| 152 |
+
}
|
| 153 |
+
};
|
| 154 |
+
|
| 155 |
+
bool makeSetCommand(string s1,string s2,const ParSet&pars,int verb=1,int level= -1);
|
| 156 |
+
ostream&printPars(ostream&out,const ParSet&pars,int level=-1);
|
| 157 |
+
bool writeParameters(ofstream&of,const ParSet&parset,int level=0);
|
| 158 |
+
bool readParameters(ifstream&f,const ParSet&parset,int verb=2,int level=0);
|
| 159 |
+
ParSet&getGlobalParSet();
|
| 160 |
+
extern bool ParameterChangedFlag;
|
| 161 |
+
template<class T>const T&addGlobalParameter(const char *name,const char *description,int level,T*adr,const T&init)
|
| 162 |
+
{
|
| 163 |
+
*adr=init;
|
| 164 |
+
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
| 165 |
+
return init;
|
| 166 |
+
}
|
| 167 |
+
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *description,int level,T*adr,const T&init)
|
| 168 |
+
{
|
| 169 |
+
*adr=init;
|
| 170 |
+
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
| 171 |
+
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
|
| 172 |
+
return init;
|
| 173 |
+
}
|
| 174 |
+
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *name3,const char *description,int level,T*adr,const T&init)
|
| 175 |
+
{
|
| 176 |
+
*adr=init;
|
| 177 |
+
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
| 178 |
+
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
|
| 179 |
+
getGlobalParSet().insert(new Parameter<T>(name3,ParameterChangedFlag,description,*adr,-1));
|
| 180 |
+
return init;
|
| 181 |
+
}
|
| 182 |
+
template<class T>const T&addGlobalParameter(const char *name,const char *name2,const char *name3,const char *name4,const char *description,int level,T*adr,const T&init)
|
| 183 |
+
{
|
| 184 |
+
*adr=init;
|
| 185 |
+
getGlobalParSet().insert(new Parameter<T>(name,ParameterChangedFlag,description,*adr,level));
|
| 186 |
+
getGlobalParSet().insert(new Parameter<T>(name2,ParameterChangedFlag,description,*adr,-1));
|
| 187 |
+
getGlobalParSet().insert(new Parameter<T>(name3,ParameterChangedFlag,description,*adr,-1));
|
| 188 |
+
getGlobalParSet().insert(new Parameter<T>(name4,ParameterChangedFlag,description,*adr,-1));
|
| 189 |
+
return init;
|
| 190 |
+
}
|
| 191 |
+
void MakeParameterOptimizing(istream&file,string resultingParameters);
|
| 192 |
+
|
| 193 |
+
#define GLOBAL_PARAMETER(TYP,VARNAME,NAME,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
| 194 |
+
#define GLOBAL_PARAMETER2(TYP,VARNAME,NAME,NAME2,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
| 195 |
+
#define GLOBAL_PARAMETER3(TYP,VARNAME,NAME,NAME2,NAME3,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,NAME3,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
| 196 |
+
#define GLOBAL_PARAMETER4(TYP,VARNAME,NAME,NAME2,NAME3,NAME4,DESCRIPTION,LEVEL,INIT) TYP VARNAME=addGlobalParameter< TYP >(NAME,NAME2,NAME3,NAME4,DESCRIPTION,LEVEL,&VARNAME,INIT);
|
| 197 |
+
|
| 198 |
+
void setParameterLevelName(unsigned int i,string x);
|
| 199 |
+
|
| 200 |
+
#endif
|
tools/giza-pp/GIZA++-v2/Perplexity.cpp
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/* Perplexity.cc
|
| 23 |
+
* =============
|
| 24 |
+
* Mike Jahr, 7/21/99
|
| 25 |
+
* Machine Translation group, WS99
|
| 26 |
+
* Center for Language and Speech Processing
|
| 27 |
+
*
|
| 28 |
+
* Last Modified by: Yaser Al-Onaizan, August 17, 1999
|
| 29 |
+
*
|
| 30 |
+
* Simple class used to calculate cross entropy and perplexity
|
| 31 |
+
* of models.
|
| 32 |
+
*/
|
| 33 |
+
|
| 34 |
+
#include "Perplexity.h"
|
| 35 |
+
|
| 36 |
+
void Perplexity::record(string model){
|
| 37 |
+
modelid.push_back(model);
|
| 38 |
+
perp.push_back(perplexity());
|
| 39 |
+
ce.push_back(cross_entropy());
|
| 40 |
+
}
|
tools/giza-pp/GIZA++-v2/Perplexity.h
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/* Perplexity.h
|
| 23 |
+
* ============
|
| 24 |
+
* Mike Jahr, 7/15/99
|
| 25 |
+
* Machine Translation group, WS99
|
| 26 |
+
* Center for Language and Speech Processing
|
| 27 |
+
*
|
| 28 |
+
* Last Modified by: Yaser Al-Onaizan, August 17, 1999
|
| 29 |
+
*
|
| 30 |
+
* Simple class used to calculate cross entropy and perplexity
|
| 31 |
+
* of models.
|
| 32 |
+
*/
|
| 33 |
+
|
| 34 |
+
#ifndef _PERPLEXITY_H
|
| 35 |
+
#define _PERPLEXITY_H
|
| 36 |
+
|
| 37 |
+
#include <cmath>
|
| 38 |
+
#include <fstream>
|
| 39 |
+
#include "Vector.h"
|
| 40 |
+
#include "defs.h"
|
| 41 |
+
#include "Array2.h"
|
| 42 |
+
#include "Globals.h"
|
| 43 |
+
|
| 44 |
+
#define CROSS_ENTROPY_BASE 2
|
| 45 |
+
|
| 46 |
+
class Perplexity {
|
| 47 |
+
private:
|
| 48 |
+
double sum;
|
| 49 |
+
double wc;
|
| 50 |
+
Array2<double, Vector<double> > *E_M_L;
|
| 51 |
+
Vector<string> modelid;
|
| 52 |
+
Vector<double > perp;
|
| 53 |
+
Vector<double > ce;
|
| 54 |
+
Vector<string> name ;
|
| 55 |
+
public:
|
| 56 |
+
~Perplexity() { delete E_M_L;}
|
| 57 |
+
Perplexity() {
|
| 58 |
+
E_M_L = new Array2<double, Vector<double> >(MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH);
|
| 59 |
+
unsigned int l, m ;
|
| 60 |
+
Vector<double> fact(MAX_SENTENCE_LENGTH, 1.0);
|
| 61 |
+
for (m = 2 ; m < MAX_SENTENCE_LENGTH ; m++)
|
| 62 |
+
fact[m] = fact[m-1] * m ;
|
| 63 |
+
for (m = 1 ; m < MAX_SENTENCE_LENGTH ; m++)
|
| 64 |
+
for (l = 1 ; l < MAX_SENTENCE_LENGTH ; l++) {
|
| 65 |
+
(*E_M_L)(l, m) = log (pow((LAMBDA * l), double(m)) * exp(-LAMBDA * double(l)) /
|
| 66 |
+
(fact[m])) ;
|
| 67 |
+
}
|
| 68 |
+
sum = 0 ;
|
| 69 |
+
wc = 0;
|
| 70 |
+
perp.clear();
|
| 71 |
+
ce.clear();
|
| 72 |
+
name.clear();
|
| 73 |
+
}
|
| 74 |
+
inline void clear() {
|
| 75 |
+
sum = 0 ;
|
| 76 |
+
wc = 0 ;
|
| 77 |
+
}
|
| 78 |
+
size_t size() const {return(min(perp.size(), ce.size()));}
|
| 79 |
+
inline void addFactor(const double p, const double count, const int l,
|
| 80 |
+
const int m,bool withPoisson) {
|
| 81 |
+
wc += count * m ; // number of french words
|
| 82 |
+
sum += count * ( (withPoisson?((*E_M_L)(l, m)):0.0) + p) ;
|
| 83 |
+
}
|
| 84 |
+
inline double perplexity() const {
|
| 85 |
+
return exp( -1*sum / wc);
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
inline double cross_entropy() const {
|
| 89 |
+
return (-1.0*sum / (log(double(CROSS_ENTROPY_BASE)) * wc));
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
inline double word_count() const {
|
| 93 |
+
return wc;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
inline double getSum() const {
|
| 97 |
+
return sum ;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
void record(string model);
|
| 101 |
+
|
| 102 |
+
friend void generatePerplexityReport(const Perplexity&, const Perplexity&,
|
| 103 |
+
const Perplexity&, const Perplexity&,
|
| 104 |
+
ostream&, int, int, bool);
|
| 105 |
+
};
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
#endif
|
tools/giza-pp/GIZA++-v2/Pointer.h
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef HEADER_Pointer_DEFINED
|
| 24 |
+
#define HEADER_Pointer_DEFINED
|
| 25 |
+
|
| 26 |
+
#include <cassert>
|
| 27 |
+
#include <ostream>
|
| 28 |
+
|
| 29 |
+
template<class T>
|
| 30 |
+
class SmartPointer
|
| 31 |
+
{
|
| 32 |
+
protected:
|
| 33 |
+
T*p;
|
| 34 |
+
public:
|
| 35 |
+
SmartPointer(T*_p=0)
|
| 36 |
+
: p(_p) {}
|
| 37 |
+
inline T&operator*() const
|
| 38 |
+
{return *p;}
|
| 39 |
+
inline T*operator->() const
|
| 40 |
+
{return p;}
|
| 41 |
+
inline operator bool() const
|
| 42 |
+
{return p!=0;}
|
| 43 |
+
inline T*ptr() const
|
| 44 |
+
{ return p; }
|
| 45 |
+
};
|
| 46 |
+
template<class T> inline ostream &operator<<(ostream&out,const SmartPointer<T>&s)
|
| 47 |
+
{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
template<class T>
|
| 51 |
+
class SmartPointerConst
|
| 52 |
+
{
|
| 53 |
+
protected:
|
| 54 |
+
const T*p;
|
| 55 |
+
public:
|
| 56 |
+
SmartPointerConst(const T*_p=0)
|
| 57 |
+
: p(_p) {}
|
| 58 |
+
inline const T&operator*() const
|
| 59 |
+
{return *p;}
|
| 60 |
+
inline const T*operator->() const
|
| 61 |
+
{return p;}
|
| 62 |
+
inline operator bool() const
|
| 63 |
+
{return p!=0;}
|
| 64 |
+
inline const T*ptr() const
|
| 65 |
+
{ return p; }
|
| 66 |
+
};
|
| 67 |
+
template<class T> inline ostream &operator<<(ostream&out,const SmartPointerConst<T>&s)
|
| 68 |
+
{if( s.ptr() )return out << *s;else return out <<"nullpointer";}
|
| 69 |
+
|
| 70 |
+
template <class T>
|
| 71 |
+
class UP : public SmartPointer<T>
|
| 72 |
+
{
|
| 73 |
+
public:
|
| 74 |
+
UP(T*_p=0)
|
| 75 |
+
: SmartPointer<T>(_p) {}
|
| 76 |
+
};
|
| 77 |
+
template<class T> inline bool operator==(const UP<T>&s1,const UP<T>&s2)
|
| 78 |
+
{return s1.ptr()==s2.ptr();}
|
| 79 |
+
template<class T> inline bool operator<(const UP<T>&s1,const UP<T>&s2)
|
| 80 |
+
{return s1.ptr() < s2.ptr();}
|
| 81 |
+
template<class T> inline int Hash(const UP<T> &wp)
|
| 82 |
+
{if(wp.ptr())return Hash(*wp);else return 0;}
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
template <class T>
|
| 86 |
+
class UPConst : public SmartPointerConst<T>
|
| 87 |
+
{
|
| 88 |
+
public:
|
| 89 |
+
UPConst(const T*_p=0)
|
| 90 |
+
: SmartPointerConst<T>(_p) {}
|
| 91 |
+
};
|
| 92 |
+
template<class T> inline bool operator==(const UPConst<T>&s1,const UPConst<T>&s2)
|
| 93 |
+
{return s1.ptr()==s2.ptr();}
|
| 94 |
+
template<class T> inline bool operator<(const UPConst<T>&s1,const UPConst<T>&s2)
|
| 95 |
+
{return s1.ptr()<s2.ptr();}
|
| 96 |
+
template<class T> inline int Hash(const UPConst<T> &wp)
|
| 97 |
+
{if(wp.ptr())return Hash(*wp);else return 0;}
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
template <class T>
|
| 101 |
+
class MP : public SmartPointer<T>
|
| 102 |
+
{
|
| 103 |
+
public:
|
| 104 |
+
MP(T*_p=0)
|
| 105 |
+
: SmartPointer<T>(_p) {}
|
| 106 |
+
};
|
| 107 |
+
template <class T> inline bool operator==(const MP<T>&s1,const MP<T>&s2)
|
| 108 |
+
{assert(s1);assert(s2);return *s1==*s2;}
|
| 109 |
+
template <class T> inline bool operator<(const MP<T>&s1,const MP<T>&s2)
|
| 110 |
+
{assert(s1);assert(s2);return *s1 < *s2;}
|
| 111 |
+
template <class T> inline int Hash(const MP<T> &wp)
|
| 112 |
+
{if(wp.ptr())return Hash(*wp);else return 0;}
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
template <class T>
|
| 116 |
+
class MPConst : public SmartPointerConst<T>
|
| 117 |
+
{
|
| 118 |
+
public:
|
| 119 |
+
MPConst(const T*_p=0)
|
| 120 |
+
: SmartPointerConst<T>(_p) {}
|
| 121 |
+
};
|
| 122 |
+
template <class T> inline bool operator==(const MPConst<T>&s1,const MPConst<T>&s2)
|
| 123 |
+
{assert(s1);assert(s2);return *s1== *s2;}
|
| 124 |
+
template <class T> inline bool operator<(const MPConst<T>&s1,const MPConst<T>&s2)
|
| 125 |
+
{assert(s1);assert(s2);return *s1 < *s2;}
|
| 126 |
+
template <class T> inline int Hash(const MPConst<T> &wp)
|
| 127 |
+
{if(wp.ptr())return Hash(*wp);else return 0;}
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
template <class T>
|
| 131 |
+
class DELP : public SmartPointer<T>
|
| 132 |
+
{
|
| 133 |
+
private:
|
| 134 |
+
DELP(const DELP<T>&x);
|
| 135 |
+
public:
|
| 136 |
+
const DELP<T>&operator=(DELP<T>&x)
|
| 137 |
+
{
|
| 138 |
+
delete this->p;
|
| 139 |
+
this->p=x.p;x.p=0;
|
| 140 |
+
return *this;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
~DELP()
|
| 144 |
+
{ delete this->p;this->p=0;}
|
| 145 |
+
DELP(T*_p=0)
|
| 146 |
+
: SmartPointer<T>(_p) {}
|
| 147 |
+
void set(T*_p)
|
| 148 |
+
{
|
| 149 |
+
delete this->p;
|
| 150 |
+
this->p=_p;
|
| 151 |
+
}
|
| 152 |
+
friend bool operator==(const DELP<T>&s1,const DELP<T>&s2)
|
| 153 |
+
{
|
| 154 |
+
return *(s1.p)== *(s2.p);
|
| 155 |
+
}
|
| 156 |
+
friend bool operator<(const DELP<T>&s1,const DELP<T>&s2)
|
| 157 |
+
{
|
| 158 |
+
return *(s1.p) < *(s2.p);
|
| 159 |
+
}
|
| 160 |
+
friend inline int Hash(const DELP<T> &wp)
|
| 161 |
+
{
|
| 162 |
+
if(wp.p)
|
| 163 |
+
return Hash(*wp.p);
|
| 164 |
+
else
|
| 165 |
+
return 0;
|
| 166 |
+
}
|
| 167 |
+
};
|
| 168 |
+
#endif
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
|
tools/giza-pp/GIZA++-v2/README
ADDED
|
@@ -0,0 +1,508 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
========================================================================
|
| 2 |
+
GIZA++ is an extension of the program GIZA.
|
| 3 |
+
It is a program for learning statistical translation models from
|
| 4 |
+
bitext. It is an implementation of the models described in
|
| 5 |
+
(Brown et al., 1993), (Vogel et al., 1996), (Och et al., 2000a),
|
| 6 |
+
(Och et al., 2000b).
|
| 7 |
+
========================================================================
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
CONTENTS of this README file:
|
| 12 |
+
|
| 13 |
+
Part I: GIZA Package Contents
|
| 14 |
+
Part II: How To Compile GIZA
|
| 15 |
+
Part III: How to Run GIZA
|
| 16 |
+
Part IV: Input File Formats
|
| 17 |
+
A. VOCABULARY FILES
|
| 18 |
+
B. Bitext Files
|
| 19 |
+
C. Dictionary File (optional)
|
| 20 |
+
Part V: Output File Formats:
|
| 21 |
+
A. PROBABILITY TABLES
|
| 22 |
+
1. T TABLE (translation table)
|
| 23 |
+
2. N TABLE (Fertility table)
|
| 24 |
+
3. P0 TABLE
|
| 25 |
+
4. A TABLE
|
| 26 |
+
5. D3 TABLE
|
| 27 |
+
6. D4 TABLE
|
| 28 |
+
7. D5 TABLE
|
| 29 |
+
8. HMM TABLE
|
| 30 |
+
B. ALIGNMENT FILE
|
| 31 |
+
C. Cross Entropy and Perplexity Files
|
| 32 |
+
D. Revised Vocabulary files
|
| 33 |
+
Part VI: Literature
|
| 34 |
+
Part VII: New features
|
| 35 |
+
|
| 36 |
+
HISTORY of this README file:
|
| 37 |
+
|
| 38 |
+
GIZA++:
|
| 39 |
+
edited: 11 Jan. 2000, Franz Josef Och
|
| 40 |
+
GIZA:
|
| 41 |
+
edited: 16 Aug. 1999, Dan Melamed
|
| 42 |
+
edited: 13 Aug. 1999, Yaser Al-Onaizan
|
| 43 |
+
edited: 20 July 1999, Yaser Al-Onaizan
|
| 44 |
+
edited: 15 July 1999, Yaser Al-Onaizan
|
| 45 |
+
edited: 13 July 1999, Noah Smith
|
| 46 |
+
========================================================================
|
| 47 |
+
|
| 48 |
+
Part 0: What is GIZA++
|
| 49 |
+
|
| 50 |
+
GIZA++ is an extension of the program GIZA (part of the SMT toolkit
|
| 51 |
+
EGYPT - http://www.clsp.jhu.edu/ws99/projects/mt/toolkit/ ) which was
|
| 52 |
+
developed by the Statistical Machine Translation team during the
|
| 53 |
+
summer workshop in 1999 at the Center for Language and Speech
|
| 54 |
+
Processing at Johns-Hopkins University (CLSP/JHU). GIZA++ includes a
|
| 55 |
+
lot of additional features. The extensions of GIZA++ were designed and
|
| 56 |
+
written by Franz Josef Och.
|
| 57 |
+
|
| 58 |
+
Features of GIZA++ not in GIZA:
|
| 59 |
+
|
| 60 |
+
- Implements full IBM-4 alignment model with a dependency of word
|
| 61 |
+
classes as described in (Brown et al. 1993)
|
| 62 |
+
|
| 63 |
+
- Implements IBM-5: dependency on word classes, smoothing, ...
|
| 64 |
+
|
| 65 |
+
- Implements HMM alignment model: Baum-Welch training, Forward-Backward
|
| 66 |
+
algorithm, empty word, dependency on word classes, transfer to
|
| 67 |
+
fertility models, ...
|
| 68 |
+
|
| 69 |
+
- Implementation of a variant of the IBM-3 and IBM-4
|
| 70 |
+
(-deficientDistortionModel 1) models which allow the training of -p0
|
| 71 |
+
|
| 72 |
+
- Smoothing for fertility, distortion/alignment parameters
|
| 73 |
+
|
| 74 |
+
- Significant more efficient training of the fertility models
|
| 75 |
+
|
| 76 |
+
- Correct implementation of pegging as described in (Brown et
|
| 77 |
+
al. 1993), implemented a series of heuristics in order to make pegging
|
| 78 |
+
sufficiently efficient
|
| 79 |
+
|
| 80 |
+
- Completely new parameter mechanism: allows to easily add additional
|
| 81 |
+
parameters
|
| 82 |
+
|
| 83 |
+
- Improved perplexity calculation for models IBM-1, IBM-2 and HMM (the
|
| 84 |
+
parameter of the Poisson-distribution of the sentence lengths is
|
| 85 |
+
computed automatically from the used training corpus)
|
| 86 |
+
|
| 87 |
+
========================================================================
|
| 88 |
+
Part I: GIZA++ Package Programs
|
| 89 |
+
|
| 90 |
+
GIZA++: GIZA++ itself
|
| 91 |
+
|
| 92 |
+
plain2snt.out: simple tool to transform plain text into GIZA text
|
| 93 |
+
format
|
| 94 |
+
|
| 95 |
+
snt2plain.out: simple tool to transform GIZA text format into plain
|
| 96 |
+
text
|
| 97 |
+
|
| 98 |
+
trainGIZA++.sh: Shell script to perform standard training given a
|
| 99 |
+
corpus in GIZA text format
|
| 100 |
+
|
| 101 |
+
========================================================================
|
| 102 |
+
Part II: How To Compile GIZA++
|
| 103 |
+
|
| 104 |
+
In order to compile GIZA++ you may need:
|
| 105 |
+
- recent version of the GNU compiler (2.95 or higher)
|
| 106 |
+
- recent version of assembler and linker which do not have restrictions
|
| 107 |
+
with respect to the length of symbol names
|
| 108 |
+
|
| 109 |
+
There is a make file in the src directory that will take care of the
|
| 110 |
+
compilation. The most important targets are:
|
| 111 |
+
|
| 112 |
+
GIZA++: generates an optimized version
|
| 113 |
+
|
| 114 |
+
GIZA++.dbg: generates the debug version
|
| 115 |
+
|
| 116 |
+
depend: generates the "dependencies" file (make this whenever you add
|
| 117 |
+
source or header files to the package.
|
| 118 |
+
|
| 119 |
+
========================================================================
|
| 120 |
+
Part III: How To run GIZA++
|
| 121 |
+
|
| 122 |
+
It's simple:
|
| 123 |
+
|
| 124 |
+
GIZA++ [config-file] [options]
|
| 125 |
+
|
| 126 |
+
All options which expect a parameter could also be used in the
|
| 127 |
+
parameter file. For example the command line options
|
| 128 |
+
|
| 129 |
+
GIZA++ -S S.vcb -T T.vcb -C ST.snt
|
| 130 |
+
|
| 131 |
+
corresponds to the config file:
|
| 132 |
+
|
| 133 |
+
S: S.vcb
|
| 134 |
+
T: T.vcb
|
| 135 |
+
C: ST.snt
|
| 136 |
+
|
| 137 |
+
If you call GIZA++ without a parameter you get a list of all the
|
| 138 |
+
options. The option names form GIZA are normally still valid. The
|
| 139 |
+
default values of the parameters typically are optimized with respect
|
| 140 |
+
to the corpora I use and typically give good results. It is
|
| 141 |
+
nevertheless important that these parameters are always optimized for
|
| 142 |
+
every new task.
|
| 143 |
+
|
| 144 |
+
==========================================================================
|
| 145 |
+
Part IV: Input File Formats
|
| 146 |
+
|
| 147 |
+
A. VOCABULARY FILES
|
| 148 |
+
|
| 149 |
+
Each entry is stored on one line as follows:
|
| 150 |
+
|
| 151 |
+
uniq_id1 string1 no_occurrences1
|
| 152 |
+
uniq_id2 string2 no_occurrences2
|
| 153 |
+
uniq_id3 string3 no_occurrences3
|
| 154 |
+
....
|
| 155 |
+
|
| 156 |
+
Here is a sample from an English vocabulary file:
|
| 157 |
+
|
| 158 |
+
627 abandon 10
|
| 159 |
+
628 abandoned 17
|
| 160 |
+
629 abandoning 2
|
| 161 |
+
630 abandonment 12
|
| 162 |
+
631 abatement 8
|
| 163 |
+
632 abbotsford 2
|
| 164 |
+
|
| 165 |
+
uniq_ids are sequential positive integer numbers. 0 is reserved for
|
| 166 |
+
the special token NULL.
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
B. Bitext Files
|
| 170 |
+
|
| 171 |
+
Each sentence pair is stored in three lines. The first line
|
| 172 |
+
is the number of times this sentence pair occurred. The second line is
|
| 173 |
+
the source sentence where each token is replaced by its unique integer
|
| 174 |
+
id from the vocabulary file and the third is the target sentence in
|
| 175 |
+
the same format.
|
| 176 |
+
|
| 177 |
+
Here's a sample of 3 sentences from English/french corpus:
|
| 178 |
+
|
| 179 |
+
1
|
| 180 |
+
1 1 226 5008 621 6492 226 6377 6813 226 9505 5100 6824 226 5100 5222 0 614 10243 613
|
| 181 |
+
2769 155 7989 585 1 578 6503 585 8242 578 8142 8541 578 12328 6595 8550 578 6595 6710 1
|
| 182 |
+
1
|
| 183 |
+
1 1 226 6260 11856 11806 1293
|
| 184 |
+
11 1 1 11 155 14888 2649 11447 9457 8488 4168
|
| 185 |
+
1
|
| 186 |
+
1 1 226 7652 1 226 5337 226 6940 12089 5582 8076 12050
|
| 187 |
+
1 1 155 4140 6812 153 1 154 155 14668 15616 10524 9954 1392
|
| 188 |
+
|
| 189 |
+
C. Dictionary File
|
| 190 |
+
|
| 191 |
+
This is optional. The dictionary file is of the format:
|
| 192 |
+
|
| 193 |
+
target_word_id source_word_id
|
| 194 |
+
|
| 195 |
+
The list should be sorted by the target_word_id.
|
| 196 |
+
|
| 197 |
+
C. Dictionary Files
|
| 198 |
+
|
| 199 |
+
If you provide a dictionary and list it in the configuration file,
|
| 200 |
+
GIZA++ will change the cooccurrence counting in the first iteration
|
| 201 |
+
of model 1 to honor the so-called "Dictionary Constraint":
|
| 202 |
+
|
| 203 |
+
In parallel sentences "e1 ... en" and "f1 ... fm",
|
| 204 |
+
ei and fi are counted as a coocurrence pair if one of two
|
| 205 |
+
conditions is met: 1.) ei and fi occur as an entry in the
|
| 206 |
+
dictionary, or 2.) ei does not occur in the dictionary with
|
| 207 |
+
any fj (1 <= j <= m) and fi does not occur in the dictionary
|
| 208 |
+
with any ej (1 <= j <= n).
|
| 209 |
+
|
| 210 |
+
The dictionary must a list of pairs, one per line:
|
| 211 |
+
|
| 212 |
+
F E
|
| 213 |
+
|
| 214 |
+
where F is an integer of a target token, and E is the integer of a
|
| 215 |
+
source token. F may be listed with other Es, and vice versa.
|
| 216 |
+
|
| 217 |
+
Important: The dictionary must be sorted by the F integers!
|
| 218 |
+
|
| 219 |
+
==========================================================================
|
| 220 |
+
Part V: Output File Formats:
|
| 221 |
+
|
| 222 |
+
For file names, we will use the prefix "prob_table". This can be
|
| 223 |
+
changed using the -o switch. The default is a combination of user id
|
| 224 |
+
and time stamp.
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
A. PROBABILITY TABLES
|
| 228 |
+
|
| 229 |
+
Normally, Model1 is trained first, and the result is used to start
|
| 230 |
+
Model2 training. Then Model2 is transfered to Model3. Model3 viterbi
|
| 231 |
+
training follows. This sequence can be adjusted by the various
|
| 232 |
+
options, either on the command line or in a config file.
|
| 233 |
+
|
| 234 |
+
1. T TABLE ( *.t3.* )
|
| 235 |
+
|
| 236 |
+
(translation table)
|
| 237 |
+
|
| 238 |
+
prob_table.t1.n = t table after n iterations of Model1 training
|
| 239 |
+
prob_table.t2.n = t table after n iterations of Model2 training
|
| 240 |
+
prob_table.t2to3 = t table after transfering Model2 to Model3
|
| 241 |
+
prob_table.t3.n = t table after n iterations of Model3 training
|
| 242 |
+
prob_table.4.n = t table after n iterations of Model4 training
|
| 243 |
+
|
| 244 |
+
Each line is of the following format:
|
| 245 |
+
|
| 246 |
+
s_id t_id P(t_id/s_id)
|
| 247 |
+
|
| 248 |
+
where:
|
| 249 |
+
s_id: is the unique id for the source token
|
| 250 |
+
t_id: is the unique id for the target token
|
| 251 |
+
P(t_id/s_id) the probability of translating s_id as t_id
|
| 252 |
+
|
| 253 |
+
sample part of a file:
|
| 254 |
+
|
| 255 |
+
3599 5697 0.0628115
|
| 256 |
+
2056 10686 0.000259988
|
| 257 |
+
8227 3738 3.57132e-13
|
| 258 |
+
5141 13720 5.52332e-12
|
| 259 |
+
10798 4102 6.53047e-06
|
| 260 |
+
8227 3750 6.97502e-14
|
| 261 |
+
7712 14080 6.0365e-20
|
| 262 |
+
7712 14082 2.68323e-17
|
| 263 |
+
7713 1083 3.94464e-15
|
| 264 |
+
7712 14084 2.98768e-15
|
| 265 |
+
|
| 266 |
+
Similar files will be generated (with the prefix
|
| 267 |
+
"prob_table.actual.xxx" that has the actual tokens instead of their
|
| 268 |
+
unique ids). This is also true for fertility tables. Also the inverse
|
| 269 |
+
probability table will be generated for the final table and it will
|
| 270 |
+
have the infix "ti" .
|
| 271 |
+
|
| 272 |
+
2. N TABLE ( *.n3.* )
|
| 273 |
+
|
| 274 |
+
(Fertility table)
|
| 275 |
+
|
| 276 |
+
prob_table.n2to3 = n table estimated during the transfer from M2 to M3
|
| 277 |
+
ptob_table.n3.X = n table after X iterations of model3
|
| 278 |
+
|
| 279 |
+
Each line in this file is of the following format:
|
| 280 |
+
|
| 281 |
+
source_token_id p0 p1 p2 .... pn
|
| 282 |
+
|
| 283 |
+
where p0 is the probability that the source token has zero fertility;
|
| 284 |
+
p1, fertility one, ...., and n is the maximum possible fertility as
|
| 285 |
+
defined in the program.
|
| 286 |
+
|
| 287 |
+
sample:
|
| 288 |
+
|
| 289 |
+
1 0.475861 0.282418 0.133455 0.0653083 0.0329326 0.00844979 0.0014008
|
| 290 |
+
10 0.249747 0.000107778 0.307767 0.192208 0.0641439 0.15016 0.0358886
|
| 291 |
+
11 0.397111 0.390421 0.19925 0.013382 2.21286e-05 0 0
|
| 292 |
+
12 0.0163432 0.560621 0.374745 0.00231588 0 0 0
|
| 293 |
+
13 1.78045e-07 0.545694 0.299573 0.132127 0.0230494 9.00322e-05 0
|
| 294 |
+
14 1.41918e-18 0.332721 0.300773 0.0334969 0 0 0
|
| 295 |
+
15 0 5.98626e-10 0.47729 0.0230955 0 0 0
|
| 296 |
+
17 0 1.66346e-07 0.895883 0.103948 0 0 0
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
3. P0 TABLE ( *.p0* )
|
| 300 |
+
|
| 301 |
+
(1 - P0 is the probability of inserting a null after a
|
| 302 |
+
source word.)
|
| 303 |
+
|
| 304 |
+
This file contains only one line with one real number which is the
|
| 305 |
+
value of P0, the probability of not inserting a NULL token.
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
4. A TABLE ( *.a[23].* )
|
| 309 |
+
|
| 310 |
+
The file names follow the naming conventions above. The format of each
|
| 311 |
+
line is as follows:
|
| 312 |
+
|
| 313 |
+
i j l m p(i | j, l, m)
|
| 314 |
+
|
| 315 |
+
where i, j, l, m are all integers and
|
| 316 |
+
j = position in target sentence
|
| 317 |
+
i = position in source sentence
|
| 318 |
+
l = length of source sentence
|
| 319 |
+
m = length of target sentence
|
| 320 |
+
and p(i/j,l,m) is the probability that a source word in position i is
|
| 321 |
+
moved to position j in a pair of sentences of length l and m.
|
| 322 |
+
|
| 323 |
+
sample:
|
| 324 |
+
|
| 325 |
+
15 14 15 14 0.630798
|
| 326 |
+
15 14 15 15 0.414137
|
| 327 |
+
15 14 15 16 0.268919
|
| 328 |
+
15 14 15 17 0.23171
|
| 329 |
+
15 14 15 18 0.117311
|
| 330 |
+
15 14 15 19 0.119202
|
| 331 |
+
15 14 15 20 0.111369
|
| 332 |
+
15 14 15 21 0.0358169
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
5. D3 TABLE ( *.d3.* )
|
| 336 |
+
|
| 337 |
+
distortion table
|
| 338 |
+
|
| 339 |
+
The format is similar to the A table with a slight difference --- the
|
| 340 |
+
position of i & j are switched:
|
| 341 |
+
|
| 342 |
+
j i l m p(j/i,l,m)
|
| 343 |
+
|
| 344 |
+
sample:
|
| 345 |
+
|
| 346 |
+
15 14 14 15 0.286397
|
| 347 |
+
15 14 14 16 0.138898
|
| 348 |
+
15 14 14 17 0.109712
|
| 349 |
+
15 14 14 18 0.0868322
|
| 350 |
+
15 14 14 19 0.0535823
|
| 351 |
+
|
| 352 |
+
6. D4 TABLE: (( *.d4.* )
|
| 353 |
+
|
| 354 |
+
distortion table for IBM-4
|
| 355 |
+
|
| 356 |
+
7. D5 TABLE: ( *.d5.* )
|
| 357 |
+
|
| 358 |
+
distortion table for IBM-5
|
| 359 |
+
|
| 360 |
+
8. HMM TABLE: ( *.hhmm.* )
|
| 361 |
+
|
| 362 |
+
alignment probability table for HMM alignment model
|
| 363 |
+
|
| 364 |
+
B. ALIGNMENT FILE ( *.A3.* )
|
| 365 |
+
|
| 366 |
+
In each iteration of the training, and for each sentence pair in the
|
| 367 |
+
training set, the best alignment (viterbi alignment) is written to the
|
| 368 |
+
alignment file (if the dump parameters are set accordingly). The
|
| 369 |
+
alignment file is named prob_table.An.i, where n is the model number
|
| 370 |
+
({1,2, 2to3, 3 or 4}), and i is the iteration number. The format of
|
| 371 |
+
the alignments file is illustrated in the following sample:
|
| 372 |
+
|
| 373 |
+
# Sentence pair (1)
|
| 374 |
+
il s' agit de la m�me soci�t� qui a chang� de propri�taires
|
| 375 |
+
NULL ({ }) UNK ({ }) UNK ({ }) ( ({ }) this ({ 4 11 }) is ({ }) the ({ }) same ({ 6 }) agency ({ }) which ({ 8 }) has ({ }) undergone ({ 1 2 3 7 9 10 12 }) a ({ }) change ({ 5 }) of ({ }) UNK ({ })
|
| 376 |
+
# Sentence pair (2)
|
| 377 |
+
UNK UNK , le propri�taire , dit que cela s' est produit si rapidement qu' il n' en conna�t pas la cause exacte
|
| 378 |
+
NULL ({ 4 }) UNK ({ 1 2 }) UNK ({ }) , ({ 3 }) the ({ }) owner ({ 5 22 23 }) , ({ 6 }) says ({ 7 8 }) it ({ }) happened ({ 10 11 12 }) so ({ 13 }) fast ({ 14 19 }) he ({ 16 }) is ({ }) not ({ 20 }) sure ({ 15 17 }) what ({ }) went ({ 18 21 }) wrong ({ 9 })
|
| 379 |
+
|
| 380 |
+
The alignment file is represented by three lines for each sentence
|
| 381 |
+
pair. The first line is a label that can be used, e.g., as a caption
|
| 382 |
+
for alignment visualization tools. It contains information about the
|
| 383 |
+
sentence sequential number in the training corpus, sentence lengths,
|
| 384 |
+
and alignment probability. The second line is the target sentence, the
|
| 385 |
+
third line is the source sentence. Each token in the source sentence
|
| 386 |
+
is followed by a set of zero or more numbers. These numbers represent
|
| 387 |
+
the positions of the target words to which this source word is
|
| 388 |
+
connected, according to the alignment.
|
| 389 |
+
|
| 390 |
+
|
| 391 |
+
C. Perplexity File ( *.perp )
|
| 392 |
+
|
| 393 |
+
This file will be generated at the end of training. It summarizes
|
| 394 |
+
perplexity values for each training iteration. Here is a sample
|
| 395 |
+
perplexity file that illustrates the format. The format is the same
|
| 396 |
+
for cross entropy. If no test corpus was provided, the values for it
|
| 397 |
+
will be set to "N/A".
|
| 398 |
+
|
| 399 |
+
# train-size test-size iter. model train-perplexity test-perplexity final(y/n) train-viterbi-perp test-viterbi-perp
|
| 400 |
+
447136 9625 0 1 187067 186722 n 3.34328e+06 3.35352e+06
|
| 401 |
+
447136 9625 1 1 192.88 248.763 n 909.879 1203.13
|
| 402 |
+
447136 9625 2 1 99.45 139.214 n 316.363 459.745
|
| 403 |
+
447136 9625 3 1 83.4746 126.046 n 214.612 341.27
|
| 404 |
+
447136 9625 4 1 78.6939 124.914 n 179.218 303.169
|
| 405 |
+
447136 9625 5 2 76.6848 125.986 n 161.874 286.226
|
| 406 |
+
447136 9625 6 2 50.7452 86.2273 n 84.7227 151.701
|
| 407 |
+
447136 9625 7 2 42.9178 74.5574 n 63.6644 116.034
|
| 408 |
+
447136 9625 8 2 40.0651 70.7444 n 56.3186 104.274
|
| 409 |
+
447136 9625 9 2 38.8471 69.4105 n 53.1277 99.6044
|
| 410 |
+
447136 9625 10 2to3 38.2561 68.9576 n 51.4856 97.4414
|
| 411 |
+
447136 9625 11 3 129.993 248.885 n 86.6675 165.012
|
| 412 |
+
447136 9625 12 3 79.2212 169.902 n 86.4842 171.367
|
| 413 |
+
447136 9625 13 3 75.0746 164.488 n 84.9647 172.639
|
| 414 |
+
447136 9625 14 3 73.412 162.765 n 83.5762 172.797
|
| 415 |
+
447136 9625 15 3 72.6107 162.254 y 82.4575 172.688
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
D. Revised Vocabulary files (*.src.vcb, *.trg.vcb)
|
| 419 |
+
|
| 420 |
+
The revised vocabulary files are similar in format to the original
|
| 421 |
+
vocabulary files. The only exceptions is that the frequency for each
|
| 422 |
+
token is calculated from the given corpus (i.e. it is exact), which is
|
| 423 |
+
not required in the input.
|
| 424 |
+
|
| 425 |
+
E. final parameter file: ( *.gizacfg )
|
| 426 |
+
|
| 427 |
+
This file includes all the parameter settings that were used in order
|
| 428 |
+
to perform this training. This means that starting GIZA using this
|
| 429 |
+
parameter file produces (should produce) the same training.
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
Part VI: LITERATURE
|
| 434 |
+
-------------------
|
| 435 |
+
|
| 436 |
+
The following two articles include a comparison of the alignment
|
| 437 |
+
models implemented in GIZA++:
|
| 438 |
+
|
| 439 |
+
@INPROCEEDINGS{och00:isa,
|
| 440 |
+
AUTHOR = {F.~J.~Och and H.~Ney},
|
| 441 |
+
TITLE ={Improved Statistical Alignment Models},
|
| 442 |
+
BOOKTITLE = ACL00 ,
|
| 443 |
+
PAGES ={440--447},
|
| 444 |
+
ADDRESS={ Hongkong, China},
|
| 445 |
+
MONTH = {October},
|
| 446 |
+
YEAR = 2000}
|
| 447 |
+
|
| 448 |
+
@INPROCEEDINGS{och00:aco,
|
| 449 |
+
AUTHOR = {F.~J.~Och and H.~Ney},
|
| 450 |
+
TITLE = {A Comparison of Alignment Models for Statistical Machine Translation},
|
| 451 |
+
BOOKTITLE = COLING00,
|
| 452 |
+
ADDRESS = {Saarbr\"ucken, Germany},
|
| 453 |
+
YEAR = {2000},
|
| 454 |
+
MONTH = {August},
|
| 455 |
+
PAGES = {1086--1090}
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
The following article describes the statistical machine translation
|
| 459 |
+
toolkit EGYPT:
|
| 460 |
+
|
| 461 |
+
@MISC{ alonaizan99:smt,
|
| 462 |
+
AUTHOR = {Y. Al-Onaizan and J. Curin and M. Jahr and K. Knight and J. Lafferty and I. D. Melamed and F. J. Och and D. Purdy and N. A. Smith and D. Yarowsky},
|
| 463 |
+
TITLE = {Statistical Machine Translation, Final Report, {JHU} Workshop},
|
| 464 |
+
YEAR = {1999},
|
| 465 |
+
ADDRESS = {Baltimore, Maryland, MD},
|
| 466 |
+
NOTE={{\tt http://www.clsp.jhu.edu/ws99/projects/ mt/final\_report/mt-final-report.ps}}
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
The implemented alignment models IBM-1 to IBM-5 and HMM were originally described in:
|
| 471 |
+
|
| 472 |
+
@ARTICLE{brown93:tmo,
|
| 473 |
+
AUTHOR = {Brown, P. F. and Della Pietra, S. A. and Della Pietra, V. J. and Mercer, R. L.},
|
| 474 |
+
TITLE = {The Mathematics of Statistical Machine Translation: Parameter Estimation},
|
| 475 |
+
JOURNAL = {Computational Linguistics},
|
| 476 |
+
YEAR = 1993,
|
| 477 |
+
VOLUME = 19,
|
| 478 |
+
NUMBER = 2,
|
| 479 |
+
PAGES = {263--311}
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
@INPROCEEDINGS{ vogel96:hbw,
|
| 483 |
+
AUTHOR = {Vogel, S. and Ney, H. and Tillmann, C.},
|
| 484 |
+
TITLE = {{HMM}-Based Word Alignment in Statistical Translation},
|
| 485 |
+
YEAR = 1996,
|
| 486 |
+
PAGES = {836--841},
|
| 487 |
+
MONTH = {August},
|
| 488 |
+
ADDRESS = {Copenhagen},
|
| 489 |
+
BOOKTITLE = COLING96
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
Part VII: New features
|
| 494 |
+
======================
|
| 495 |
+
|
| 496 |
+
2003-06-09:
|
| 497 |
+
|
| 498 |
+
- new parameter "-nbestalignments N": prints an N-best list of
|
| 499 |
+
alignments into a file *.NBEST
|
| 500 |
+
|
| 501 |
+
- If program is compiled with "-DBINARY_SEARCH_FOR_TTABLE", it uses
|
| 502 |
+
more memory-efficient data structures for the t table (vector with
|
| 503 |
+
binary search instead of hash table). Then, the program expects a
|
| 504 |
+
parameter "-CoocurrenceFile FILE" which specifies a file which
|
| 505 |
+
includes all lexical coccurrences in the training corpus. This file
|
| 506 |
+
can be produced by the snt2cooc.out tool.
|
| 507 |
+
|
| 508 |
+
|
tools/giza-pp/GIZA++-v2/TTables.cpp
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
#include "TTables.h"
|
| 23 |
+
#include "Parameter.h"
|
| 24 |
+
|
| 25 |
+
GLOBAL_PARAMETER(float,PROB_CUTOFF,"PROB CUTOFF","Probability cutoff threshold for lexicon probabilities",PARLEV_OPTHEUR,1e-7);
|
| 26 |
+
GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF,"COUNTINCREASE CUTOFF","countCutoff","Counts increment cutoff threshold",PARLEV_OPTHEUR,1e-6);
|
| 27 |
+
|
| 28 |
+
#ifdef BINARY_SEARCH_FOR_TTABLE
|
| 29 |
+
template <class COUNT, class PROB>
|
| 30 |
+
void tmodel<COUNT, PROB>::printCountTable(const char *,
|
| 31 |
+
const Vector<WordEntry>&,
|
| 32 |
+
const Vector<WordEntry>&,
|
| 33 |
+
const bool) const
|
| 34 |
+
{
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
template <class COUNT, class PROB>
|
| 38 |
+
void tmodel<COUNT, PROB>::printProbTable(const char *filename,
|
| 39 |
+
const Vector<WordEntry>& evlist,
|
| 40 |
+
const Vector<WordEntry>& fvlist,
|
| 41 |
+
const bool actual) const
|
| 42 |
+
{
|
| 43 |
+
ofstream of(filename);
|
| 44 |
+
/* for(unsigned int i=0;i<es.size()-1;++i)
|
| 45 |
+
for(unsigned int j=es[i];j<es[i+1];++j)
|
| 46 |
+
{
|
| 47 |
+
const CPPair&x=fs[j].second;
|
| 48 |
+
WordIndex e=i,f=fs[j].first;
|
| 49 |
+
if( actual )
|
| 50 |
+
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
|
| 51 |
+
else
|
| 52 |
+
of << e << ' ' << f << ' ' << x.prob << '\n';
|
| 53 |
+
}*/
|
| 54 |
+
for(unsigned int i=0;i<lexmat.size();++i)
|
| 55 |
+
{
|
| 56 |
+
if( lexmat[i] )
|
| 57 |
+
for(unsigned int j=0;j<lexmat[i]->size();++j)
|
| 58 |
+
{
|
| 59 |
+
const CPPair&x=(*lexmat[i])[j].second;
|
| 60 |
+
WordIndex e=i,f=(*lexmat[i])[j].first;
|
| 61 |
+
if( x.prob>PROB_SMOOTH )
|
| 62 |
+
if( actual )
|
| 63 |
+
of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
|
| 64 |
+
else
|
| 65 |
+
of << e << ' ' << f << ' ' << x.prob << '\n';
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
template <class COUNT, class PROB>
|
| 71 |
+
void tmodel<COUNT, PROB>::printProbTableInverse(const char *,
|
| 72 |
+
const Vector<WordEntry>&,
|
| 73 |
+
const Vector<WordEntry>&,
|
| 74 |
+
const double,
|
| 75 |
+
const double,
|
| 76 |
+
const bool ) const
|
| 77 |
+
{
|
| 78 |
+
}
|
| 79 |
+
template <class COUNT, class PROB>
|
| 80 |
+
void tmodel<COUNT, PROB>::normalizeTable(const vcbList&, const vcbList&, int)
|
| 81 |
+
{
|
| 82 |
+
for(unsigned int i=0;i<lexmat.size();++i)
|
| 83 |
+
{
|
| 84 |
+
double c=0.0;
|
| 85 |
+
if( lexmat[i] )
|
| 86 |
+
{
|
| 87 |
+
unsigned int lSize=lexmat[i]->size();
|
| 88 |
+
for(unsigned int j=0;j<lSize;++j)
|
| 89 |
+
c+=(*lexmat[i])[j].second.count;
|
| 90 |
+
for(unsigned int j=0;j<lSize;++j)
|
| 91 |
+
{
|
| 92 |
+
if( c==0 )
|
| 93 |
+
(*lexmat[i])[j].second.prob=1.0/(lSize);
|
| 94 |
+
else
|
| 95 |
+
(*lexmat[i])[j].second.prob=(*lexmat[i])[j].second.count/c;
|
| 96 |
+
(*lexmat[i])[j].second.count=0;
|
| 97 |
+
}
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
template <class COUNT, class PROB>
|
| 103 |
+
void tmodel<COUNT, PROB>::readProbTable(const char *){
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
template class tmodel<COUNT,PROB> ;
|
| 107 |
+
#else
|
| 108 |
+
/* ------------------ Method Definiotns for Class tmodel --------------------*/
|
| 109 |
+
|
| 110 |
+
#
|
| 111 |
+
template <class COUNT, class PROB>
|
| 112 |
+
void tmodel<COUNT, PROB>::printCountTable(const char *filename,
|
| 113 |
+
const Vector<WordEntry>& evlist,
|
| 114 |
+
const Vector<WordEntry>& fvlist,
|
| 115 |
+
const bool actual) const
|
| 116 |
+
// this function dumps the t table. Each line is of the following format:
|
| 117 |
+
//
|
| 118 |
+
// c(target_word/source_word) source_word target_word
|
| 119 |
+
{
|
| 120 |
+
ofstream of(filename);
|
| 121 |
+
typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i;
|
| 122 |
+
for(i = ef.begin(); i != ef.end();++i){
|
| 123 |
+
if ( ((*i).second).count > COUNTINCREASE_CUTOFF)
|
| 124 |
+
if (actual)
|
| 125 |
+
of << ((*i).second).count << ' ' << evlist[ ((*i).first).first ].word << ' ' << fvlist[((*i).first).second].word << ' ' << (*i).second.prob << '\n';
|
| 126 |
+
else
|
| 127 |
+
of << ((*i).second).count << ' ' << ((*i).first).first << ' ' << ((*i).first).second << ' ' << (*i).second.prob << '\n';
|
| 128 |
+
}
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
template <class COUNT, class PROB>
|
| 132 |
+
void tmodel<COUNT, PROB>::printProbTable(const char *filename,
|
| 133 |
+
const Vector<WordEntry>& evlist,
|
| 134 |
+
const Vector<WordEntry>& fvlist,
|
| 135 |
+
const bool actual) const
|
| 136 |
+
// this function dumps the t table. Each line is of the following format:
|
| 137 |
+
//
|
| 138 |
+
// source_word target_word p(target_word/source_word)
|
| 139 |
+
{
|
| 140 |
+
ofstream of(filename);
|
| 141 |
+
typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i;
|
| 142 |
+
for(i = ef.begin(); i != ef.end();++i)
|
| 143 |
+
if( actual )
|
| 144 |
+
of << evlist[((*i).first).first].word << ' ' <<
|
| 145 |
+
fvlist[((*i).first).second].word << ' ' << (*i).second.prob << '\n';
|
| 146 |
+
else
|
| 147 |
+
of << ((*i).first).first << ' ' << ((*i).first).second << ' ' <<
|
| 148 |
+
(*i).second.prob << '\n';
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
template <class COUNT, class PROB>
|
| 152 |
+
void tmodel<COUNT, PROB>::printProbTableInverse(const char *filename,
|
| 153 |
+
const Vector<WordEntry>& evlist,
|
| 154 |
+
const Vector<WordEntry>& fvlist,
|
| 155 |
+
const double,
|
| 156 |
+
const double,
|
| 157 |
+
const bool actual) const
|
| 158 |
+
// this function dumps the inverse t table. Each line is of the format:
|
| 159 |
+
//
|
| 160 |
+
// target_word_id source_word_id p(source_word/target_word)
|
| 161 |
+
//
|
| 162 |
+
// if flag "actual " is true then print actual word entries instead of
|
| 163 |
+
// token ids
|
| 164 |
+
{
|
| 165 |
+
cerr << "Dumping the t table inverse to file: " << filename << '\n';
|
| 166 |
+
ofstream of(filename);
|
| 167 |
+
typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i;
|
| 168 |
+
PROB p_inv = 0 ;
|
| 169 |
+
// static const PROB ratio(double(fTotal)/eTotal);
|
| 170 |
+
WordIndex e, f ;
|
| 171 |
+
int no_errors(0);
|
| 172 |
+
vector<PROB> total(fvlist.size(),PROB(0)) ; // Sum over all e of P(f/e) * p(e) - needed for normalization
|
| 173 |
+
|
| 174 |
+
for(i = ef.begin(); i != ef.end(); i++){
|
| 175 |
+
e = ((*i).first).first ;
|
| 176 |
+
f = ((*i).first).second ;
|
| 177 |
+
total[f] += (PROB) evlist[e].freq * ((*i).second.prob); //add P(f/ei) * F(ei)
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
for(i = ef.begin(); i != ef.end(); i++){
|
| 181 |
+
e = ((*i).first).first ;
|
| 182 |
+
f = ((*i).first).second ;
|
| 183 |
+
p_inv = ((*i).second.prob) * (PROB) evlist[e].freq / total[f] ;
|
| 184 |
+
if (p_inv > 1.0001 || p_inv < 0){
|
| 185 |
+
no_errors++;
|
| 186 |
+
if (no_errors <= 10){
|
| 187 |
+
cerr << "printProbTableInverse(): Error - P("<<evlist[e].word<<"("<<
|
| 188 |
+
e<<") / "<<fvlist[f].word << "("<<f<<")) = " << p_inv <<'\n';
|
| 189 |
+
cerr << "f(e) = "<<evlist[e].freq << " Sum(p(f/e).f(e)) = " << total[f] <<
|
| 190 |
+
" P(f/e) = " <<((*i).second.prob) <<'\n';
|
| 191 |
+
if (no_errors == 10)
|
| 192 |
+
cerr<<"printProbTableInverse(): Too many P inverse errors ..\n";
|
| 193 |
+
}
|
| 194 |
+
}
|
| 195 |
+
if (actual)
|
| 196 |
+
of << fvlist[f].word << ' ' << evlist[e].word << ' ' << p_inv << '\n';
|
| 197 |
+
else
|
| 198 |
+
of << f << ' ' << e << ' ' << p_inv << '\n';
|
| 199 |
+
}
|
| 200 |
+
}
|
| 201 |
+
/*
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
{
|
| 206 |
+
cerr << "Dumping the t table inverse to file: " << filename << '\n';
|
| 207 |
+
ofstream of(filename);
|
| 208 |
+
hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i;
|
| 209 |
+
PROB p_inv = 0 ;
|
| 210 |
+
static const PROB ratio(double(fTotal)/eTotal);
|
| 211 |
+
WordIndex e, f ;
|
| 212 |
+
for(i = ef.begin(); i != ef.end(); i++){
|
| 213 |
+
e = ((*i).first).first ;
|
| 214 |
+
f = ((*i).first).second ;
|
| 215 |
+
p_inv = ((*i).second.prob) * ratio * (PROB) evlist[e].freq /
|
| 216 |
+
(PROB) fvlist[f].freq ;
|
| 217 |
+
if (actual)
|
| 218 |
+
of << fvlist[f].word << ' ' << evlist[e].word << ' ' << p_inv << '\n';
|
| 219 |
+
else
|
| 220 |
+
of << f << ' ' << e << ' ' << p_inv << '\n';
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
*/
|
| 224 |
+
template <class COUNT, class PROB>
|
| 225 |
+
void tmodel<COUNT, PROB>::normalizeTable(const vcbList&engl, const vcbList&french, int iter)
|
| 226 |
+
// normalize conditional probability P(fj/ei):
|
| 227 |
+
// i.e. make sure that Sum over all j of P(fj/e) = 1
|
| 228 |
+
// this method reads the counts portion of the table and normalize into
|
| 229 |
+
// the probability portion. Then the counts are cleared (i.e. zeroed)
|
| 230 |
+
// if the resulting probability of an entry is below a threshold, then
|
| 231 |
+
// remove it .
|
| 232 |
+
{
|
| 233 |
+
if( iter==2 )
|
| 234 |
+
{
|
| 235 |
+
total2.resize(engl.uniqTokens());for(unsigned int i=0;i<total2.size();i++)total2[i]=0.0;
|
| 236 |
+
}
|
| 237 |
+
nFrench.resize(engl.uniqTokens());for(unsigned int i=0;i<nFrench.size();i++)nFrench[i]=0;
|
| 238 |
+
nEng.resize(french.uniqTokens());for(unsigned int i=0;i<nEng.size();i++)nEng[i]=0;
|
| 239 |
+
Vector<double> total(engl.uniqTokens(),0.0);
|
| 240 |
+
//Vector<int> nFrench(engl.uniqTokens(), 0);
|
| 241 |
+
//Vector<int> nEng(french.uniqTokens(), 0);
|
| 242 |
+
|
| 243 |
+
typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i;
|
| 244 |
+
for(i = ef.begin(); i != ef.end(); i++){ // for all possible source words e
|
| 245 |
+
if( iter==2 )
|
| 246 |
+
total2[((*i).first).first] += (*i).second.count;
|
| 247 |
+
total[((*i).first).first] += (*i).second.count;
|
| 248 |
+
nFrench[((*i).first).first]++;
|
| 249 |
+
nEng[((*i).first).second]++;
|
| 250 |
+
}
|
| 251 |
+
for(unsigned int k=0;k<engl.uniqTokens();++k)
|
| 252 |
+
if( nFrench[k] )
|
| 253 |
+
{
|
| 254 |
+
double probMass=(french.uniqTokensInCorpus()-nFrench[k])*PROB_SMOOTH;
|
| 255 |
+
if( probMass<0.0 )
|
| 256 |
+
cout << k << " french.uniqTokensInCorpus(): " << french.uniqTokensInCorpus() << " nFrench[k]:"<< nFrench[k] << '\n';
|
| 257 |
+
total[k]+= total[k]*probMass/(1-probMass);
|
| 258 |
+
}
|
| 259 |
+
typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::iterator j, k;
|
| 260 |
+
PROB p ;
|
| 261 |
+
int nParams=0;
|
| 262 |
+
for(j = ef.begin(); j != ef.end(); ){
|
| 263 |
+
k = j;
|
| 264 |
+
k++ ;
|
| 265 |
+
if( (total[((*j).first).first])>0.0 )
|
| 266 |
+
p = ((((*j).second).count) /(total[((*j).first).first])) ;
|
| 267 |
+
else
|
| 268 |
+
p= 0.0;
|
| 269 |
+
if (p > PROB_CUTOFF)
|
| 270 |
+
{
|
| 271 |
+
if( iter>0 )
|
| 272 |
+
{
|
| 273 |
+
((*j).second).prob = 0 ;
|
| 274 |
+
((*j).second).count = p ;
|
| 275 |
+
}
|
| 276 |
+
else
|
| 277 |
+
{
|
| 278 |
+
((*j).second).prob = p ;
|
| 279 |
+
((*j).second).count = 0 ;
|
| 280 |
+
}
|
| 281 |
+
nParams++;
|
| 282 |
+
}
|
| 283 |
+
else {
|
| 284 |
+
erase(((*j).first).first, ((*j).first).second);
|
| 285 |
+
}
|
| 286 |
+
j = k ;
|
| 287 |
+
}
|
| 288 |
+
if( iter>0 )
|
| 289 |
+
return normalizeTable(engl, french, iter-1);
|
| 290 |
+
else
|
| 291 |
+
{
|
| 292 |
+
}
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
template <class COUNT, class PROB>
|
| 296 |
+
void tmodel<COUNT, PROB>::readProbTable(const char *filename){
|
| 297 |
+
/* This function reads the t table from a file.
|
| 298 |
+
Each line is of the format: source_word_id target_word_id p(target_word|source_word)
|
| 299 |
+
This is the inverse operation of the printTable function.
|
| 300 |
+
NAS, 7/11/99
|
| 301 |
+
*/
|
| 302 |
+
ifstream inf(filename);
|
| 303 |
+
cerr << "Reading t prob. table from " << filename << "\n";
|
| 304 |
+
if(!inf){
|
| 305 |
+
cerr << "\nERROR: Cannot open " << filename << "\n";
|
| 306 |
+
return;
|
| 307 |
+
}
|
| 308 |
+
WordIndex src_id, trg_id;
|
| 309 |
+
PROB prob;
|
| 310 |
+
int nEntry=0;
|
| 311 |
+
while( inf >> src_id >> trg_id >> prob){
|
| 312 |
+
insert(src_id, trg_id, 0.0, prob);
|
| 313 |
+
nEntry++;
|
| 314 |
+
}
|
| 315 |
+
cerr << "Read " << nEntry << " entries in prob. table.\n";
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
template class tmodel<COUNT,PROB> ;
|
| 319 |
+
|
| 320 |
+
/* ---------------- End of Method Definitions of class tmodel ---------------*/
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
#endif
|
tools/giza-pp/GIZA++-v2/TTables.h
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/* --------------------------------------------------------------------------*
|
| 23 |
+
* *
|
| 24 |
+
* Module : TTables *
|
| 25 |
+
* *
|
| 26 |
+
* Prototypes File: TTables.h *
|
| 27 |
+
* *
|
| 28 |
+
* Objective: Defines clases and methods for handling I/O for Probability & *
|
| 29 |
+
* Count tables and also alignment tables *
|
| 30 |
+
*****************************************************************************/
|
| 31 |
+
|
| 32 |
+
#ifndef _ttables_h
|
| 33 |
+
#define _ttables_h 1
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
#include "defs.h"
|
| 37 |
+
#include "vocab.h"
|
| 38 |
+
|
| 39 |
+
#include <cassert>
|
| 40 |
+
|
| 41 |
+
#include <iostream>
|
| 42 |
+
#include <algorithm>
|
| 43 |
+
#include <functional>
|
| 44 |
+
#include <map>
|
| 45 |
+
#include <set>
|
| 46 |
+
#include "Vector.h"
|
| 47 |
+
#include <utility>
|
| 48 |
+
|
| 49 |
+
#include <fstream>
|
| 50 |
+
|
| 51 |
+
#include "Globals.h"
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
/* The tables defined in the following classes are defined as hash tables. For
|
| 55 |
+
example. the t-table is a hash function of a word pair; an alignment is
|
| 56 |
+
a hash function of a vector of integer numbers (sentence positions) and so
|
| 57 |
+
on */
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
/*----------- Defnition of Hash Function for class tmodel ------- -----------*/
|
| 61 |
+
|
| 62 |
+
typedef pair<WordIndex, WordIndex> wordPairIds;
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
class hashpair : public unary_function< pair<WordIndex, WordIndex>, size_t >
|
| 66 |
+
{
|
| 67 |
+
public:
|
| 68 |
+
size_t operator() (const pair<WordIndex, WordIndex>& key) const
|
| 69 |
+
{
|
| 70 |
+
return (size_t) MAX_W*key.first + key.second; /* hash function and it
|
| 71 |
+
is guarnteed to have
|
| 72 |
+
unique id for each
|
| 73 |
+
unique pair */
|
| 74 |
+
}
|
| 75 |
+
};
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
/* ------------------ Class Prototype Definitions ---------------------------*
|
| 80 |
+
Class Name: tmodel
|
| 81 |
+
Objective: This defines the underlying data structur for t Tables and t
|
| 82 |
+
Count Tables. They are defined as a hash table. Each entry in the hash table
|
| 83 |
+
is the probability (P(fj/ei) ) or count collected for ( C(fj/ei)). The
|
| 84 |
+
probability and the count are represented as log integer probability as
|
| 85 |
+
defined by the class LogProb .
|
| 86 |
+
|
| 87 |
+
This class is used to represents t Tables (probabiliity) and n (fertility
|
| 88 |
+
Tables and also their corresponding count tables .
|
| 89 |
+
|
| 90 |
+
*---------------------------------------------------------------------------*/
|
| 91 |
+
|
| 92 |
+
//typedef float COUNT ;
|
| 93 |
+
//typedef LogProb PROB ;
|
| 94 |
+
template <class COUNT, class PROB>
|
| 95 |
+
class LpPair {
|
| 96 |
+
public:
|
| 97 |
+
COUNT count ;
|
| 98 |
+
PROB prob ;
|
| 99 |
+
public: // constructor
|
| 100 |
+
LpPair():count(0), prob(0){} ;
|
| 101 |
+
LpPair(COUNT c, PROB p):count(c), prob(p){};
|
| 102 |
+
} ;
|
| 103 |
+
|
| 104 |
+
#ifdef BINARY_SEARCH_FOR_TTABLE
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
template<class T>
|
| 108 |
+
T*mbinary_search(T*x,T*y,unsigned int val)
|
| 109 |
+
{
|
| 110 |
+
if( y-x==0 )
|
| 111 |
+
return 0;
|
| 112 |
+
if( x->first==val)
|
| 113 |
+
return x;
|
| 114 |
+
if( y-x<2 )
|
| 115 |
+
return 0;
|
| 116 |
+
T*mid=x+(y-x)/2;
|
| 117 |
+
if( val < mid->first )
|
| 118 |
+
return mbinary_search(x,mid,val);
|
| 119 |
+
else
|
| 120 |
+
return mbinary_search(mid,y,val);
|
| 121 |
+
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
template<class T>
|
| 125 |
+
const T*mbinary_search(const T*x,const T*y,unsigned int val)
|
| 126 |
+
{
|
| 127 |
+
if( y-x==0 )
|
| 128 |
+
return 0;
|
| 129 |
+
if( x->first==val)
|
| 130 |
+
return x;
|
| 131 |
+
if( y-x<2 )
|
| 132 |
+
return 0;
|
| 133 |
+
const T*mid=x+(y-x)/2;
|
| 134 |
+
if( val < mid->first )
|
| 135 |
+
return mbinary_search(x,mid,val);
|
| 136 |
+
else
|
| 137 |
+
return mbinary_search(mid,y,val);
|
| 138 |
+
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
template <class COUNT, class PROB>
|
| 142 |
+
class tmodel{
|
| 143 |
+
typedef LpPair<COUNT, PROB> CPPair;
|
| 144 |
+
public:
|
| 145 |
+
int noEnglishWords; // total number of unique source words
|
| 146 |
+
int noFrenchWords; // total number of unique target words
|
| 147 |
+
//vector<pair<unsigned int,CPPair> > fs;
|
| 148 |
+
//vector<unsigned int> es;
|
| 149 |
+
vector< vector<pair<unsigned int,CPPair> >* > lexmat;
|
| 150 |
+
|
| 151 |
+
void erase(WordIndex e, WordIndex f)
|
| 152 |
+
{
|
| 153 |
+
CPPair *p=find(e,f);
|
| 154 |
+
if(p)
|
| 155 |
+
*p=CPPair(0,0);
|
| 156 |
+
};
|
| 157 |
+
CPPair*find(int e,int f)
|
| 158 |
+
{
|
| 159 |
+
//pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
|
| 160 |
+
//pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
|
| 161 |
+
pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
|
| 162 |
+
pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
|
| 163 |
+
pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
|
| 164 |
+
if( x==0 )
|
| 165 |
+
{
|
| 166 |
+
//cerr << "A:DID NOT FIND ENTRY: " << e << " " << f << '\n';
|
| 167 |
+
//abort();
|
| 168 |
+
return 0;
|
| 169 |
+
}
|
| 170 |
+
return &(x->second);
|
| 171 |
+
}
|
| 172 |
+
const CPPair*find(int e,int f)const
|
| 173 |
+
{
|
| 174 |
+
const pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
|
| 175 |
+
const pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
|
| 176 |
+
//const pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
|
| 177 |
+
//const pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
|
| 178 |
+
const pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
|
| 179 |
+
if( x==0 )
|
| 180 |
+
{
|
| 181 |
+
//cerr << "B:DID NOT FIND ENTRY: " << e << " " << f << '\n';
|
| 182 |
+
//abort();
|
| 183 |
+
return 0;
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
return &(x->second);
|
| 187 |
+
}
|
| 188 |
+
public:
|
| 189 |
+
void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
|
| 190 |
+
*find(e,f)=CPPair(cval,pval);
|
| 191 |
+
}
|
| 192 |
+
CPPair*getPtr(int e,int f){return find(e,f);}
|
| 193 |
+
tmodel(const string&fn)
|
| 194 |
+
{
|
| 195 |
+
int count=0,count2=0;
|
| 196 |
+
ifstream infile2(fn.c_str());
|
| 197 |
+
int e,f,olde=-1,oldf=-1;
|
| 198 |
+
pair<unsigned int,CPPair> cp;
|
| 199 |
+
vector< pair<unsigned int,CPPair> > cps;
|
| 200 |
+
while(infile2>>e>>f)
|
| 201 |
+
{
|
| 202 |
+
cp.first=f;
|
| 203 |
+
assert(e>=olde);
|
| 204 |
+
assert(e>olde ||f>oldf);
|
| 205 |
+
if( e!=olde&&olde>=0 )
|
| 206 |
+
{
|
| 207 |
+
int oldsize=lexmat.size();
|
| 208 |
+
lexmat.resize(olde+1);
|
| 209 |
+
for(unsigned int i=oldsize;i<lexmat.size();++i)
|
| 210 |
+
lexmat[i]=0;
|
| 211 |
+
lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
|
| 212 |
+
cps.clear();
|
| 213 |
+
if( !((*lexmat[olde]).size()==(*lexmat[olde]).capacity()) )
|
| 214 |
+
cerr << "eRROR: waste of memory: " << (*lexmat[olde]).size() << " " << (*lexmat[olde]).capacity() << endl;
|
| 215 |
+
count2+=lexmat[olde]->capacity();
|
| 216 |
+
}
|
| 217 |
+
cps.push_back(cp);
|
| 218 |
+
olde=e;
|
| 219 |
+
oldf=f;
|
| 220 |
+
count++;
|
| 221 |
+
}
|
| 222 |
+
lexmat.resize(olde+1);
|
| 223 |
+
lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
|
| 224 |
+
count2+=lexmat[olde]->capacity();
|
| 225 |
+
cout << "There are " << count << " " << count2 << " entries in table" << '\n';
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
/* tmodel(const string&fn)
|
| 230 |
+
{
|
| 231 |
+
size_t count=0;
|
| 232 |
+
{
|
| 233 |
+
ifstream infile1(fn.c_str());
|
| 234 |
+
if( !infile1 )
|
| 235 |
+
{
|
| 236 |
+
cerr << "ERROR: can't read coocurrence file " << fn << '\n';
|
| 237 |
+
abort();
|
| 238 |
+
}
|
| 239 |
+
int e,f;
|
| 240 |
+
while(infile1>>e>>f)
|
| 241 |
+
count++;
|
| 242 |
+
}
|
| 243 |
+
cout << "There are " << count << " entries in table" << '\n';
|
| 244 |
+
ifstream infile2(fn.c_str());
|
| 245 |
+
fs.resize(count);
|
| 246 |
+
int e,f,olde=-1,oldf=-1;
|
| 247 |
+
pair<unsigned int,CPPair> cp;
|
| 248 |
+
count=0;
|
| 249 |
+
while(infile2>>e>>f)
|
| 250 |
+
{
|
| 251 |
+
assert(e>=olde);
|
| 252 |
+
assert(e>olde ||f>oldf);
|
| 253 |
+
if( e!=olde )
|
| 254 |
+
{
|
| 255 |
+
es.resize(e+1);
|
| 256 |
+
for(unsigned int i=olde+1;int(i)<=e;++i)
|
| 257 |
+
es[i]=count;
|
| 258 |
+
}
|
| 259 |
+
cp.first=f;
|
| 260 |
+
assert(count<fs.size());
|
| 261 |
+
fs[count]=cp;
|
| 262 |
+
//fs.push_back(cp);
|
| 263 |
+
olde=e;
|
| 264 |
+
oldf=f;
|
| 265 |
+
count++;
|
| 266 |
+
}
|
| 267 |
+
assert(count==fs.size());
|
| 268 |
+
es.push_back(fs.size());
|
| 269 |
+
cout << fs.size() << " " << count << " coocurrences read" << '\n';
|
| 270 |
+
}*/
|
| 271 |
+
void incCount(WordIndex e, WordIndex f, COUNT inc)
|
| 272 |
+
{
|
| 273 |
+
if( inc )
|
| 274 |
+
{
|
| 275 |
+
CPPair *p=find(e,f);
|
| 276 |
+
if( p )
|
| 277 |
+
p->count += inc ;
|
| 278 |
+
}
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
PROB getProb(WordIndex e, WordIndex f) const
|
| 282 |
+
{
|
| 283 |
+
const CPPair *p=find(e,f);
|
| 284 |
+
if( p )
|
| 285 |
+
return max(p->prob, PROB_SMOOTH);
|
| 286 |
+
else
|
| 287 |
+
return PROB_SMOOTH;
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
COUNT getCount(WordIndex e, WordIndex f) const
|
| 291 |
+
{
|
| 292 |
+
const CPPair *p=find(e,f);
|
| 293 |
+
if( p )
|
| 294 |
+
return p->count;
|
| 295 |
+
else
|
| 296 |
+
return 0.0;
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
|
| 300 |
+
void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
|
| 301 |
+
void printProbTableInverse(const char *filename,
|
| 302 |
+
const Vector<WordEntry>& evlist,
|
| 303 |
+
const Vector<WordEntry>& fvlist,
|
| 304 |
+
const double eTotal,
|
| 305 |
+
const double fTotal,
|
| 306 |
+
const bool actual = false ) const;
|
| 307 |
+
void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
|
| 308 |
+
void readProbTable(const char *filename);
|
| 309 |
+
};
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
#else
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
template <class COUNT, class PROB>
|
| 316 |
+
class tmodel{
|
| 317 |
+
typedef LpPair<COUNT, PROB> CPPair;
|
| 318 |
+
public:
|
| 319 |
+
int noEnglishWords; // total number of unique source words
|
| 320 |
+
int noFrenchWords; // total number of unique target words
|
| 321 |
+
hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> > ef;
|
| 322 |
+
void erase(WordIndex e, WordIndex f)
|
| 323 |
+
// In: a source and a target token ids.
|
| 324 |
+
// removes the entry with that pair from table
|
| 325 |
+
{
|
| 326 |
+
ef.erase(wordPairIds(e, f));
|
| 327 |
+
};
|
| 328 |
+
|
| 329 |
+
public:
|
| 330 |
+
Vector<PROB> total2;
|
| 331 |
+
Vector<int> nFrench;
|
| 332 |
+
Vector<int> nEng;
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
// methods;
|
| 336 |
+
|
| 337 |
+
// insert: add entry P(fj/ei) to the hash function, Default value is 0.0
|
| 338 |
+
void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
|
| 339 |
+
ef[wordPairIds(e, f)].count = cval ;
|
| 340 |
+
ef[wordPairIds(e, f)].prob = pval ;
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
// returns a reference to the word pair, if does not exists, it creates it.
|
| 344 |
+
CPPair&getRe(WordIndex e, WordIndex f)
|
| 345 |
+
{return ef[wordPairIds(e, f)];}
|
| 346 |
+
|
| 347 |
+
// returns a pointer to an existing word pair. if pair does not exists,
|
| 348 |
+
// the method returns the zero pointer (NULL)
|
| 349 |
+
|
| 350 |
+
CPPair*getPtr(WordIndex e, WordIndex f)
|
| 351 |
+
{
|
| 352 |
+
// look up this pair and return its position
|
| 353 |
+
typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::iterator i = ef.find(wordPairIds(e, f));
|
| 354 |
+
if(i != ef.end()) // if it exists, return a pointer to it.
|
| 355 |
+
return(&((*i).second));
|
| 356 |
+
else return(0) ; // else return NULL pointer
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
void incCount(WordIndex e, WordIndex f, COUNT inc)
|
| 360 |
+
// increments the count of the given word pair. if the pair does not exist,
|
| 361 |
+
// it creates it with the given value.
|
| 362 |
+
{
|
| 363 |
+
if( inc )
|
| 364 |
+
ef[wordPairIds(e, f)].count += inc ;
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
PROB getProb(WordIndex e, WordIndex f) const
|
| 368 |
+
// read probability value for P(fj/ei) from the hash table
|
| 369 |
+
// if pair does not exist, return floor value PROB_SMOOTH
|
| 370 |
+
{
|
| 371 |
+
typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i= ef.find(wordPairIds(e, f));
|
| 372 |
+
if(i == ef.end())
|
| 373 |
+
return PROB_SMOOTH;
|
| 374 |
+
else
|
| 375 |
+
return max(((*i).second).prob, PROB_SMOOTH);
|
| 376 |
+
}
|
| 377 |
+
|
| 378 |
+
COUNT getCount(WordIndex e, WordIndex f) const
|
| 379 |
+
/* read count value for entry pair (fj/ei) from the hash table */
|
| 380 |
+
{
|
| 381 |
+
typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i= ef.find(wordPairIds(e, f));
|
| 382 |
+
if(i == ef.end())
|
| 383 |
+
return 0;
|
| 384 |
+
else
|
| 385 |
+
return ((*i).second).count;
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
inline const hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >& getHash(void) const {return ef;};
|
| 389 |
+
/* get a refernece to the hash table */
|
| 390 |
+
//inline void resize(WordIndex n) {ef.resize(n);};
|
| 391 |
+
// to resize he hash table
|
| 392 |
+
|
| 393 |
+
void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
|
| 394 |
+
void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
|
| 395 |
+
// print the t table to the given file but this time print actual source and
|
| 396 |
+
// target words instead of thier token ids
|
| 397 |
+
|
| 398 |
+
void printProbTableInverse(const char *filename,
|
| 399 |
+
const Vector<WordEntry>& evlist,
|
| 400 |
+
const Vector<WordEntry>& fvlist,
|
| 401 |
+
const double eTotal,
|
| 402 |
+
const double fTotal,
|
| 403 |
+
const bool actual = false ) const;
|
| 404 |
+
// dump inverse of t table (i.e P(ei/fj)) to the given file name,
|
| 405 |
+
// if the given flag is true then actual words are printed not token ids
|
| 406 |
+
|
| 407 |
+
void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
|
| 408 |
+
// to norlmalize the table i.e. make sure P(fj/ei) for all j is equal to 1
|
| 409 |
+
|
| 410 |
+
void readProbTable(const char *filename);
|
| 411 |
+
// void readAsFertilityTable(const char *filename);
|
| 412 |
+
};
|
| 413 |
+
/*--------------- End of Class Definition for tmodel -----------------------*/
|
| 414 |
+
|
| 415 |
+
#endif
|
| 416 |
+
|
| 417 |
+
#endif
|
tools/giza-pp/GIZA++-v2/Vector.h
ADDED
|
@@ -0,0 +1,427 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/*--
|
| 23 |
+
Vector: checked vector implementation
|
| 24 |
+
|
| 25 |
+
Franz Josef Och (30/07/99)
|
| 26 |
+
--*/
|
| 27 |
+
#ifndef ARRAY_H_DEFINED
|
| 28 |
+
#define ARRAY_H_DEFINED
|
| 29 |
+
#include "mystl.h"
|
| 30 |
+
#include <algorithm>
|
| 31 |
+
#include <string>
|
| 32 |
+
#include <utility>
|
| 33 |
+
#include <functional>
|
| 34 |
+
#include <cassert>
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
#ifdef NDEBUG
|
| 38 |
+
|
| 39 |
+
#include <vector>
|
| 40 |
+
#define Vector vector
|
| 41 |
+
template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
|
| 42 |
+
{
|
| 43 |
+
o << "Vector(" << a.size() << "){ ";
|
| 44 |
+
for(unsigned int iii=0;iii<a.size();iii++)
|
| 45 |
+
o << " " << iii<< ": " << a[iii]<<" ;";
|
| 46 |
+
return o << "}\n";
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
#else
|
| 50 |
+
|
| 51 |
+
#define ARRAY_DEBUG
|
| 52 |
+
#define memo_del(a, b)
|
| 53 |
+
#define memo_new(a)
|
| 54 |
+
|
| 55 |
+
template<class T> class Vector
|
| 56 |
+
{
|
| 57 |
+
private:
|
| 58 |
+
T *p;
|
| 59 |
+
int realSize;
|
| 60 |
+
int maxWritten;
|
| 61 |
+
|
| 62 |
+
void copy(T *a, const T *b, int n);
|
| 63 |
+
void copy(T *a, T *b, int n);
|
| 64 |
+
void _expand();
|
| 65 |
+
|
| 66 |
+
public:
|
| 67 |
+
Vector()
|
| 68 |
+
: p(0), realSize(0), maxWritten(-1)
|
| 69 |
+
{
|
| 70 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 71 |
+
cout << "MAKE ARRAY: " << this<<" "<<(void*)p << '\n';
|
| 72 |
+
#endif
|
| 73 |
+
}
|
| 74 |
+
Vector(const Vector<T> &x)
|
| 75 |
+
: p(new T[x.maxWritten+1]), realSize(x.maxWritten+1), maxWritten(x.maxWritten)
|
| 76 |
+
{
|
| 77 |
+
memo_new(p);
|
| 78 |
+
copy(p, x.p, realSize);
|
| 79 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 80 |
+
cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< '\n';
|
| 81 |
+
#endif
|
| 82 |
+
}
|
| 83 |
+
explicit Vector(int n)
|
| 84 |
+
: p(new T[n]), realSize(n), maxWritten(n-1)
|
| 85 |
+
{
|
| 86 |
+
memo_new(p);
|
| 87 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 88 |
+
cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 89 |
+
#endif
|
| 90 |
+
}
|
| 91 |
+
Vector(int n, const T&_init)
|
| 92 |
+
: p(new T[n]), realSize(n), maxWritten(n-1)
|
| 93 |
+
{
|
| 94 |
+
memo_new(p);
|
| 95 |
+
for(int iii=0;iii<n;iii++)p[iii]=_init;
|
| 96 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 97 |
+
cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 98 |
+
#endif
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
~Vector()
|
| 102 |
+
{
|
| 103 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 104 |
+
cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 105 |
+
#endif
|
| 106 |
+
delete [] p;
|
| 107 |
+
memo_del(p, 1);
|
| 108 |
+
#ifndef NDEBUG
|
| 109 |
+
p=0;realSize=-1;maxWritten=-1;
|
| 110 |
+
#endif
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
Vector<T>& operator=(const Vector<T>&x)
|
| 114 |
+
{
|
| 115 |
+
if( this!= &x )
|
| 116 |
+
{
|
| 117 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 118 |
+
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 119 |
+
#endif
|
| 120 |
+
delete [] p;
|
| 121 |
+
memo_del(p, 1);
|
| 122 |
+
realSize = x.maxWritten+1;
|
| 123 |
+
maxWritten = x.maxWritten;
|
| 124 |
+
p = new T[realSize];
|
| 125 |
+
memo_new(p);
|
| 126 |
+
copy(p, x.p, realSize);
|
| 127 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 128 |
+
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 129 |
+
#endif
|
| 130 |
+
}
|
| 131 |
+
return *this;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
Vector<T>& operator=(Vector<T>&x)
|
| 135 |
+
{
|
| 136 |
+
if( this!= &x )
|
| 137 |
+
{
|
| 138 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 139 |
+
cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 140 |
+
#endif
|
| 141 |
+
delete [] p;
|
| 142 |
+
memo_del(p, 1);
|
| 143 |
+
realSize = x.maxWritten+1;
|
| 144 |
+
maxWritten = x.maxWritten;
|
| 145 |
+
p = new T[realSize];
|
| 146 |
+
memo_new(p);
|
| 147 |
+
copy(p, x.p, realSize);
|
| 148 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 149 |
+
cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 150 |
+
#endif
|
| 151 |
+
}
|
| 152 |
+
return *this;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
void allowAccess(int n)
|
| 156 |
+
{
|
| 157 |
+
while( realSize<=n )
|
| 158 |
+
_expand();
|
| 159 |
+
maxWritten=max(maxWritten, n);
|
| 160 |
+
assert( maxWritten<realSize );
|
| 161 |
+
}
|
| 162 |
+
void resize(int n)
|
| 163 |
+
{
|
| 164 |
+
while( realSize<n )
|
| 165 |
+
_expand();
|
| 166 |
+
maxWritten=n-1;
|
| 167 |
+
}
|
| 168 |
+
void clear()
|
| 169 |
+
{
|
| 170 |
+
resize(0);
|
| 171 |
+
}
|
| 172 |
+
void reserve(int n)
|
| 173 |
+
{
|
| 174 |
+
int maxOld=maxWritten;
|
| 175 |
+
resize(n);
|
| 176 |
+
maxWritten=maxOld;
|
| 177 |
+
}
|
| 178 |
+
void sort(int until=-1)
|
| 179 |
+
{
|
| 180 |
+
if( until== -1 ) until=size();
|
| 181 |
+
std::sort(p, p+until);
|
| 182 |
+
}
|
| 183 |
+
void invsort(int until=-1)
|
| 184 |
+
{
|
| 185 |
+
if( until== -1 ) until=size();
|
| 186 |
+
std::sort(p, p+until, greater<T>());
|
| 187 |
+
}
|
| 188 |
+
void init(int n, const T&_init)
|
| 189 |
+
{
|
| 190 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 191 |
+
cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 192 |
+
#endif
|
| 193 |
+
delete []p;
|
| 194 |
+
memo_del(p, 1);
|
| 195 |
+
p=new T[n];
|
| 196 |
+
memo_new(p);
|
| 197 |
+
realSize=n;
|
| 198 |
+
maxWritten=n-1;
|
| 199 |
+
for(int iii=0;iii<n;iii++)p[iii]=_init;
|
| 200 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 201 |
+
cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 202 |
+
#endif
|
| 203 |
+
}
|
| 204 |
+
inline unsigned int size() const
|
| 205 |
+
{assert( maxWritten<realSize );
|
| 206 |
+
return maxWritten+1;}
|
| 207 |
+
inline int low() const
|
| 208 |
+
{ return 0; }
|
| 209 |
+
inline int high() const
|
| 210 |
+
{ return maxWritten; }
|
| 211 |
+
int findMax() const;
|
| 212 |
+
int findMin() const;
|
| 213 |
+
void errorAccess(int n) const;
|
| 214 |
+
inline T*getPointerToData(){return p;}
|
| 215 |
+
inline T*begin(){return p;}
|
| 216 |
+
inline T*end(){return p+maxWritten+1;}
|
| 217 |
+
inline T& operator[](int n)
|
| 218 |
+
{
|
| 219 |
+
#ifndef NDEBUG
|
| 220 |
+
if( n<0 || n>maxWritten )
|
| 221 |
+
errorAccess(n);
|
| 222 |
+
#endif
|
| 223 |
+
return p[n];
|
| 224 |
+
}
|
| 225 |
+
inline const T& operator[](int n) const
|
| 226 |
+
{
|
| 227 |
+
#ifndef NDEBUG
|
| 228 |
+
if(n<0 || n>maxWritten )
|
| 229 |
+
errorAccess(n);
|
| 230 |
+
#endif
|
| 231 |
+
return p[n];
|
| 232 |
+
}
|
| 233 |
+
inline const T& get(int n) const
|
| 234 |
+
{
|
| 235 |
+
#ifndef NDEBUG
|
| 236 |
+
if(n<0 || n>maxWritten )
|
| 237 |
+
errorAccess(n);
|
| 238 |
+
#endif
|
| 239 |
+
return p[n];
|
| 240 |
+
}
|
| 241 |
+
const T&top(int n=0) const
|
| 242 |
+
{return (*this)[maxWritten-n];}
|
| 243 |
+
T&top(int n=0)
|
| 244 |
+
{return (*this)[maxWritten-n];}
|
| 245 |
+
const T&back(int n=0) const
|
| 246 |
+
{return (*this)[maxWritten-n];}
|
| 247 |
+
T&back(int n=0)
|
| 248 |
+
{return (*this)[maxWritten-n];}
|
| 249 |
+
T&push_back(const T&x)
|
| 250 |
+
{
|
| 251 |
+
allowAccess(maxWritten+1);
|
| 252 |
+
(*this)[maxWritten]=x;
|
| 253 |
+
return top();
|
| 254 |
+
}
|
| 255 |
+
/*
|
| 256 |
+
bool writeTo(ostream&out) const
|
| 257 |
+
{
|
| 258 |
+
out << "Vector ";
|
| 259 |
+
out << size() << " ";
|
| 260 |
+
out << a << '\n';
|
| 261 |
+
for(int iv=0;iv<=maxWritten;iv++)
|
| 262 |
+
{
|
| 263 |
+
writeOb(out, (*this)[iv]);
|
| 264 |
+
out << '\n';
|
| 265 |
+
}
|
| 266 |
+
return 1;
|
| 267 |
+
}
|
| 268 |
+
*/
|
| 269 |
+
|
| 270 |
+
bool readFrom(istream&in)
|
| 271 |
+
{
|
| 272 |
+
string s;
|
| 273 |
+
if( !in )
|
| 274 |
+
{
|
| 275 |
+
cerr << "ERROR(Vector): file cannot be opened.\n";
|
| 276 |
+
return 0;
|
| 277 |
+
}
|
| 278 |
+
in >> s;
|
| 279 |
+
if( !(s=="Vector") )
|
| 280 |
+
{
|
| 281 |
+
cerr << "ERROR(Vector): Vector!='"<<s<<"'\n";
|
| 282 |
+
return 0;
|
| 283 |
+
}
|
| 284 |
+
int biggest;
|
| 285 |
+
in >> biggest;
|
| 286 |
+
in >> a;
|
| 287 |
+
resize(biggest);
|
| 288 |
+
for(int iv=0;iv<size();iv++)
|
| 289 |
+
{
|
| 290 |
+
readOb(in, (*this)[iv]);
|
| 291 |
+
}
|
| 292 |
+
return 1;
|
| 293 |
+
}
|
| 294 |
+
};
|
| 295 |
+
|
| 296 |
+
template<class T> bool operator==(const Vector<T> &x, const Vector<T> &y)
|
| 297 |
+
{
|
| 298 |
+
if( &x == &y )
|
| 299 |
+
return 1;
|
| 300 |
+
else
|
| 301 |
+
{
|
| 302 |
+
if( y.size()!=x.size() )
|
| 303 |
+
return 0;
|
| 304 |
+
else
|
| 305 |
+
{
|
| 306 |
+
for(unsigned int iii=0;iii<x.size();iii++)
|
| 307 |
+
if( !(x[iii]==y[iii]) )
|
| 308 |
+
return 0;
|
| 309 |
+
return 1;
|
| 310 |
+
}
|
| 311 |
+
}
|
| 312 |
+
}
|
| 313 |
+
template<class T> bool operator!=(const Vector<T> &x, const Vector<T> &y)
|
| 314 |
+
{
|
| 315 |
+
return !(x==y);
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
template<class T> bool operator<(const Vector<T> &x, const Vector<T> &y)
|
| 319 |
+
{
|
| 320 |
+
if( &x == &y )
|
| 321 |
+
return 0;
|
| 322 |
+
else
|
| 323 |
+
{
|
| 324 |
+
if( y.size()<x.size() )
|
| 325 |
+
return !(y<x);
|
| 326 |
+
for(int iii=0;iii<x.size();iii++)
|
| 327 |
+
{
|
| 328 |
+
assert( iii!=y.size() );
|
| 329 |
+
if( x[iii]<y[iii] )
|
| 330 |
+
return 1;
|
| 331 |
+
else if( y[iii]<x[iii] )
|
| 332 |
+
return 0;
|
| 333 |
+
}
|
| 334 |
+
return x.size()!=y.size();//??
|
| 335 |
+
}
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
template<class T> void Vector<T>:: errorAccess(int n) const
|
| 340 |
+
{
|
| 341 |
+
cerr << "ERROR: Access to array element " << n
|
| 342 |
+
<< " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
|
| 343 |
+
cout << "ERROR: Access to array element " << n
|
| 344 |
+
<< " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
|
| 345 |
+
assert(0);
|
| 346 |
+
#ifndef DEBUG
|
| 347 |
+
abort();
|
| 348 |
+
#endif
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
|
| 352 |
+
{
|
| 353 |
+
o << "Vector(" << a.size() << "){ ";
|
| 354 |
+
for(unsigned int iii=0;iii<a.size();iii++)
|
| 355 |
+
o << " " << iii<< ": " << a[iii]<<" ;";
|
| 356 |
+
return o << "}\n";
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
template<class T> istream& operator>>(istream&in, Vector<T>&)
|
| 360 |
+
{return in;}
|
| 361 |
+
|
| 362 |
+
template<class T> int Hash(const Vector<T>&a)
|
| 363 |
+
{
|
| 364 |
+
int n=0;
|
| 365 |
+
for(int iii=0;iii<a.size();iii++)
|
| 366 |
+
n+=Hash(a[iii])*(iii+1);
|
| 367 |
+
return n+a.size()*47;
|
| 368 |
+
}
|
| 369 |
+
template<class T> void Vector<T>::copy(T *aa, const T *bb, int n)
|
| 370 |
+
{
|
| 371 |
+
for(int iii=0;iii<n;iii++)
|
| 372 |
+
aa[iii]=bb[iii];
|
| 373 |
+
}
|
| 374 |
+
template<class T> void Vector<T>::copy(T *aa, T *bb, int n)
|
| 375 |
+
{
|
| 376 |
+
for(int iii=0;iii<n;iii++)
|
| 377 |
+
aa[iii]=bb[iii];
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
template<class T> void Vector<T>::_expand()
|
| 381 |
+
{
|
| 382 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 383 |
+
cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 384 |
+
#endif
|
| 385 |
+
T *oldp=p;
|
| 386 |
+
int oldsize=realSize;
|
| 387 |
+
realSize=realSize*2+1;
|
| 388 |
+
p=new T[realSize];
|
| 389 |
+
memo_new(p);
|
| 390 |
+
copy(p, oldp, oldsize);
|
| 391 |
+
delete [] oldp;
|
| 392 |
+
memo_del(oldp, 1);
|
| 393 |
+
#ifdef VERY_ARRAY_DEBUG
|
| 394 |
+
cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
|
| 395 |
+
#endif
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
template<class T> int Vector<T>::findMax() const
|
| 399 |
+
{
|
| 400 |
+
if( size()==0 )
|
| 401 |
+
return -1;
|
| 402 |
+
else
|
| 403 |
+
{
|
| 404 |
+
int maxPos=0;
|
| 405 |
+
for(int iii=1;iii<size();iii++)
|
| 406 |
+
if( (*this)[maxPos]<(*this)[iii] )
|
| 407 |
+
maxPos=iii;
|
| 408 |
+
return maxPos;
|
| 409 |
+
}
|
| 410 |
+
}
|
| 411 |
+
template<class T> int Vector<T>::findMin() const
|
| 412 |
+
{
|
| 413 |
+
if( size()==0 )
|
| 414 |
+
return -1;
|
| 415 |
+
else
|
| 416 |
+
{
|
| 417 |
+
int minPos=0;
|
| 418 |
+
for(int iii=1;iii<size();iii++)
|
| 419 |
+
if( (*this)[iii]<(*this)[minPos] )
|
| 420 |
+
minPos=iii;
|
| 421 |
+
return minPos;
|
| 422 |
+
}
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
#endif
|
| 426 |
+
|
| 427 |
+
#endif
|
tools/giza-pp/GIZA++-v2/WordClasses.h
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef WordClasses_h_DEFINED
|
| 24 |
+
#define WordClasses_h_DEFINED
|
| 25 |
+
#include <map>
|
| 26 |
+
#include <string>
|
| 27 |
+
#include <set>
|
| 28 |
+
|
| 29 |
+
class WordClasses
|
| 30 |
+
{
|
| 31 |
+
private:
|
| 32 |
+
map<string,string> Sw2c;
|
| 33 |
+
map<string,int> Sc2int;
|
| 34 |
+
Vector<string> Sint2c;
|
| 35 |
+
Vector<int> w2c;
|
| 36 |
+
unsigned int classes;
|
| 37 |
+
public:
|
| 38 |
+
WordClasses()
|
| 39 |
+
: classes(1)
|
| 40 |
+
{
|
| 41 |
+
Sint2c.push_back("0");
|
| 42 |
+
Sc2int["0"]=0;
|
| 43 |
+
}
|
| 44 |
+
template<class MAPPER> bool read(istream&in,const MAPPER&m)
|
| 45 |
+
{
|
| 46 |
+
string sline;
|
| 47 |
+
int maxword=0;
|
| 48 |
+
while(getline(in,sline))
|
| 49 |
+
{
|
| 50 |
+
string word,wclass;
|
| 51 |
+
//istringstream iline(sline.c_str());
|
| 52 |
+
istringstream iline(sline);
|
| 53 |
+
iline>>word>>wclass;
|
| 54 |
+
maxword=max(m(word),maxword);
|
| 55 |
+
assert(Sw2c.count(word)==0);
|
| 56 |
+
Sw2c[word]=wclass;
|
| 57 |
+
if( !Sc2int.count(wclass) )
|
| 58 |
+
{
|
| 59 |
+
Sc2int[wclass]=classes++;
|
| 60 |
+
Sint2c.push_back(wclass);
|
| 61 |
+
assert(classes==Sint2c.size());
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
w2c=Vector<int>(maxword+1,0);
|
| 65 |
+
for(map<string,string>::const_iterator i=Sw2c.begin();i!=Sw2c.end();++i)
|
| 66 |
+
w2c[m(i->first)]=Sc2int[i->second];
|
| 67 |
+
cout << "Read classes: #words: " << maxword << " " << " #classes: "<< classes <<endl;
|
| 68 |
+
return 1;
|
| 69 |
+
}
|
| 70 |
+
int getClass(int w)const
|
| 71 |
+
{
|
| 72 |
+
if(w>=0&&int(w)<int(w2c.size()) )
|
| 73 |
+
return w2c[w];
|
| 74 |
+
else
|
| 75 |
+
return 0;
|
| 76 |
+
}
|
| 77 |
+
int operator()(const string&x)const
|
| 78 |
+
{
|
| 79 |
+
if( Sc2int.count(x) )
|
| 80 |
+
return Sc2int.find(x)->second;
|
| 81 |
+
else
|
| 82 |
+
{
|
| 83 |
+
cerr << "WARNING: class " << x << " not found.\n";
|
| 84 |
+
return 0;
|
| 85 |
+
}
|
| 86 |
+
}
|
| 87 |
+
string classString(unsigned int cnr)const
|
| 88 |
+
{
|
| 89 |
+
if( cnr<Sint2c.size())
|
| 90 |
+
return Sint2c[cnr];
|
| 91 |
+
else
|
| 92 |
+
return string("0");
|
| 93 |
+
}
|
| 94 |
+
};
|
| 95 |
+
|
| 96 |
+
#endif
|
tools/giza-pp/GIZA++-v2/alignment.cpp
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/*--
|
| 23 |
+
alignment: 'checked' alignment representation with automatic calculation
|
| 24 |
+
of fertilities
|
| 25 |
+
Franz Josef Och (30/07/99)
|
| 26 |
+
--*/
|
| 27 |
+
#include "alignment.h"
|
| 28 |
+
|
| 29 |
+
ostream&operator<<(ostream&out, const alignment&a)
|
| 30 |
+
{
|
| 31 |
+
int m=a.a.size()-1,l=a.f.size()-1;
|
| 32 |
+
out << "AL(l:"<<l<<",m:"<<m<<")(a: ";
|
| 33 |
+
for(int j=1;j<=m;j++)out << a(j) << ' ';
|
| 34 |
+
out << ")(fert: ";
|
| 35 |
+
for(int i=0;i<=l;i++)out << a.fert(i) << ' ';
|
| 36 |
+
return out << ") c:"<<"\n";
|
| 37 |
+
}
|
| 38 |
+
|
tools/giza-pp/GIZA++-v2/alignment.h
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/*--
|
| 23 |
+
alignment: 'checked' alignment representation with autom. calc. of fertilities
|
| 24 |
+
Franz Josef Och (30/07/99)
|
| 25 |
+
--*/
|
| 26 |
+
#ifndef alignment_h_fjo_defined
|
| 27 |
+
#define alignment_h_fjo_defined
|
| 28 |
+
#include "Vector.h"
|
| 29 |
+
#include <cassert>
|
| 30 |
+
#include "defs.h"
|
| 31 |
+
#include "myassert.h"
|
| 32 |
+
|
| 33 |
+
class al_struct
|
| 34 |
+
{
|
| 35 |
+
public:
|
| 36 |
+
al_struct()
|
| 37 |
+
: prev(0),next(0){}
|
| 38 |
+
PositionIndex prev,next;
|
| 39 |
+
};
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class alignment
|
| 43 |
+
{
|
| 44 |
+
private:
|
| 45 |
+
Vector<PositionIndex> a;
|
| 46 |
+
Vector<PositionIndex> positionSum,f;
|
| 47 |
+
public:
|
| 48 |
+
Vector<PositionIndex> als_i;
|
| 49 |
+
Vector<al_struct> als_j;
|
| 50 |
+
PositionIndex l,m;
|
| 51 |
+
alignment()
|
| 52 |
+
{}
|
| 53 |
+
alignment(PositionIndex _l, PositionIndex _m)
|
| 54 |
+
: a(_m+1, (PositionIndex)0),
|
| 55 |
+
positionSum(_l+1, (PositionIndex)0), f(_l+1, (PositionIndex)0), als_i(_l+1,0),als_j(_m+1),l(_l), m(_m)
|
| 56 |
+
{
|
| 57 |
+
f[0]=m;
|
| 58 |
+
for(PositionIndex j=1;j<=m;j++)
|
| 59 |
+
{
|
| 60 |
+
if( j>1 )
|
| 61 |
+
als_j[j].prev= j-1;
|
| 62 |
+
if( j<m )
|
| 63 |
+
als_j[j].next= j+1;
|
| 64 |
+
}
|
| 65 |
+
als_i[0]=1;
|
| 66 |
+
}
|
| 67 |
+
PositionIndex get_l()const
|
| 68 |
+
{return l;}
|
| 69 |
+
PositionIndex get_m()const
|
| 70 |
+
{return m;}
|
| 71 |
+
void doMove(int i,int j)
|
| 72 |
+
{
|
| 73 |
+
set(j,i);
|
| 74 |
+
}
|
| 75 |
+
void doSwap(int j1,int j2)
|
| 76 |
+
{
|
| 77 |
+
int aj1=a[j1],aj2=a[j2];
|
| 78 |
+
set(j1,aj2);
|
| 79 |
+
set(j2,aj1);
|
| 80 |
+
}
|
| 81 |
+
void set(PositionIndex j, PositionIndex aj)
|
| 82 |
+
{
|
| 83 |
+
PositionIndex old_aj=a[j];
|
| 84 |
+
massert(j<a.size());massert(aj<f.size());
|
| 85 |
+
massert(old_aj<f.size());massert(f[old_aj]>0);
|
| 86 |
+
massert(j>0);
|
| 87 |
+
positionSum[old_aj]-=j;
|
| 88 |
+
// ausfuegen
|
| 89 |
+
PositionIndex prev=als_j[j].prev;
|
| 90 |
+
PositionIndex next=als_j[j].next;
|
| 91 |
+
if( next )
|
| 92 |
+
als_j[next].prev=prev;
|
| 93 |
+
if( prev )
|
| 94 |
+
als_j[prev].next=next;
|
| 95 |
+
else
|
| 96 |
+
als_i[old_aj]=next;
|
| 97 |
+
|
| 98 |
+
// neue Position suchen
|
| 99 |
+
PositionIndex lfd=als_i[aj],llfd=0;
|
| 100 |
+
while( lfd && lfd<j )
|
| 101 |
+
lfd = als_j[llfd=lfd].next;
|
| 102 |
+
|
| 103 |
+
// einfuegen
|
| 104 |
+
als_j[j].prev=llfd;
|
| 105 |
+
als_j[j].next=lfd;
|
| 106 |
+
if( llfd )
|
| 107 |
+
als_j[llfd].next=j;
|
| 108 |
+
else
|
| 109 |
+
als_i[aj]=j;
|
| 110 |
+
if( lfd )
|
| 111 |
+
als_j[lfd].prev=j;
|
| 112 |
+
|
| 113 |
+
f[old_aj]--;
|
| 114 |
+
positionSum[aj]+=j;
|
| 115 |
+
f[aj]++;
|
| 116 |
+
a[j]=aj;
|
| 117 |
+
}
|
| 118 |
+
const Vector<PositionIndex>& getAlignment() const
|
| 119 |
+
{return a ;}
|
| 120 |
+
PositionIndex get_al(PositionIndex j)const
|
| 121 |
+
{
|
| 122 |
+
massert(j<a.size());
|
| 123 |
+
return a[j];
|
| 124 |
+
}
|
| 125 |
+
PositionIndex operator()(PositionIndex j)const
|
| 126 |
+
{
|
| 127 |
+
massert(j<a.size());
|
| 128 |
+
return a[j];
|
| 129 |
+
}
|
| 130 |
+
PositionIndex fert(PositionIndex i)const
|
| 131 |
+
{
|
| 132 |
+
massert(i<f.size());
|
| 133 |
+
return f[i];
|
| 134 |
+
}
|
| 135 |
+
PositionIndex get_head(PositionIndex i)const
|
| 136 |
+
{
|
| 137 |
+
massert( als_i[i]==_get_head(i) );
|
| 138 |
+
return als_i[i];
|
| 139 |
+
}
|
| 140 |
+
PositionIndex get_center(PositionIndex i)const
|
| 141 |
+
{
|
| 142 |
+
if( i==0 )return 0;
|
| 143 |
+
massert(((positionSum[i]+f[i]-1)/f[i]==_get_center(i)));
|
| 144 |
+
return (positionSum[i]+f[i]-1)/f[i];
|
| 145 |
+
}
|
| 146 |
+
PositionIndex _get_head(PositionIndex i)const
|
| 147 |
+
{
|
| 148 |
+
if( fert(i)==0 )return 0;
|
| 149 |
+
for(PositionIndex j=1;j<=m;j++)
|
| 150 |
+
if( a[j]==i )
|
| 151 |
+
return j;
|
| 152 |
+
return 0;
|
| 153 |
+
}
|
| 154 |
+
PositionIndex _get_center(PositionIndex i)const
|
| 155 |
+
{
|
| 156 |
+
if( i==0 )return 0;
|
| 157 |
+
massert(fert(i));
|
| 158 |
+
PositionIndex sum=0;
|
| 159 |
+
for(PositionIndex j=1;j<=m;j++)
|
| 160 |
+
if( a[j]==i )
|
| 161 |
+
sum+=j;
|
| 162 |
+
return (sum+fert(i)-1)/fert(i);
|
| 163 |
+
}
|
| 164 |
+
PositionIndex prev_cept(PositionIndex i)const
|
| 165 |
+
{
|
| 166 |
+
if( i==0 )return 0;
|
| 167 |
+
PositionIndex k=i-1;
|
| 168 |
+
while(k&&fert(k)==0)
|
| 169 |
+
k--;
|
| 170 |
+
return k;
|
| 171 |
+
}
|
| 172 |
+
PositionIndex next_cept(PositionIndex i)const
|
| 173 |
+
{
|
| 174 |
+
PositionIndex k=i+1;
|
| 175 |
+
while(k<l+1&&fert(k)==0)
|
| 176 |
+
k++;
|
| 177 |
+
return k;
|
| 178 |
+
}
|
| 179 |
+
PositionIndex prev_in_cept(PositionIndex j)const
|
| 180 |
+
{
|
| 181 |
+
//PositionIndex k=j-1;
|
| 182 |
+
//while(k&&a[k]!=a[j])
|
| 183 |
+
//k--;
|
| 184 |
+
//assert( als_j[j].prev==k );
|
| 185 |
+
//assert(k);
|
| 186 |
+
//return k;
|
| 187 |
+
massert(als_j[j].prev==0||a[als_j[j].prev]==a[j]);
|
| 188 |
+
return als_j[j].prev;
|
| 189 |
+
}
|
| 190 |
+
friend ostream &operator<<(ostream&out, const alignment&a);
|
| 191 |
+
friend bool operator==(const alignment&a, const alignment&b)
|
| 192 |
+
{
|
| 193 |
+
massert(a.a.size()==b.a.size());
|
| 194 |
+
for(PositionIndex j=1;j<=a.get_m();j++)
|
| 195 |
+
if(a(j)!=b(j))
|
| 196 |
+
return 0;
|
| 197 |
+
return 1;
|
| 198 |
+
}
|
| 199 |
+
friend bool operator<(const alignment&x, const alignment&y)
|
| 200 |
+
{
|
| 201 |
+
massert(x.get_m()==y.get_m());
|
| 202 |
+
for(PositionIndex j=1;j<=x.get_m();j++)
|
| 203 |
+
if( x(j)<y(j) )
|
| 204 |
+
return 1;
|
| 205 |
+
else if( y(j)<x(j) )
|
| 206 |
+
return 0;
|
| 207 |
+
return 0;
|
| 208 |
+
}
|
| 209 |
+
friend int differences(const alignment&x, const alignment&y){
|
| 210 |
+
int count=0;
|
| 211 |
+
massert(x.get_m()==y.get_m());
|
| 212 |
+
for(PositionIndex j=1;j<=x.get_m();j++)
|
| 213 |
+
count += (x(j)!=y(j));
|
| 214 |
+
return count;
|
| 215 |
+
}
|
| 216 |
+
bool valid()const
|
| 217 |
+
{
|
| 218 |
+
if( 2*f[0]>m )
|
| 219 |
+
return 0;
|
| 220 |
+
for(unsigned int i=1;i<=l;i++)
|
| 221 |
+
if( f[i]>=MAX_FERTILITY )
|
| 222 |
+
return 0;
|
| 223 |
+
return 1;
|
| 224 |
+
}
|
| 225 |
+
friend class transpair_model5;
|
| 226 |
+
};
|
| 227 |
+
#endif
|
tools/giza-pp/GIZA++-v2/collCounts.cpp
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#include "alignment.h"
|
| 24 |
+
#include "transpair_model3.h"
|
| 25 |
+
#include <map>
|
| 26 |
+
#include "collCounts.h"
|
| 27 |
+
#include "MoveSwapMatrix.h"
|
| 28 |
+
#include "D5Tables.h"
|
| 29 |
+
#include "transpair_model5.h"
|
| 30 |
+
#include "transpair_modelhmm.h"
|
| 31 |
+
#include "Parameter.h"
|
| 32 |
+
|
| 33 |
+
extern float COUNTINCREASE_CUTOFF_AL;
|
| 34 |
+
// unifies collectCountsOverAlignments and findAlignmentNeighborhood FJO-20/07/99
|
| 35 |
+
template<class TRANSPAIR>
|
| 36 |
+
int collectCountsOverNeighborhood(const MoveSwapMatrix<TRANSPAIR>&msc,LogProb ascore,Array2<LogProb,Vector<LogProb> >&dtcount,Array2<LogProb,Vector<LogProb> >&ncount,LogProb&p1count,LogProb&p0count,LogProb&total_count)
|
| 37 |
+
{
|
| 38 |
+
int nAl=0;
|
| 39 |
+
const PositionIndex l=msc.get_l(),m=msc.get_m();
|
| 40 |
+
Array2<LogProb,Vector<LogProb> > cmove(l+1,m+1),cswap(l+1,m+1);
|
| 41 |
+
Vector<LogProb> negmove(m+1),negswap(m+1),plus1fert(l+1),minus1fert(l+1);
|
| 42 |
+
LogProb total_move,total_swap;
|
| 43 |
+
if( msc.isCenterDeleted()==0 )
|
| 44 |
+
{
|
| 45 |
+
total_move+=ascore;
|
| 46 |
+
nAl++;
|
| 47 |
+
}
|
| 48 |
+
for(PositionIndex j=1;j<=m;j++)
|
| 49 |
+
for(PositionIndex i=0;i<=l;i++)
|
| 50 |
+
if( msc(j)!=i && !msc.isDelMove(i,j) )
|
| 51 |
+
{
|
| 52 |
+
LogProb newscore=ascore*msc.cmove(i,j);
|
| 53 |
+
total_move+=newscore;
|
| 54 |
+
nAl++;
|
| 55 |
+
cmove(i,j)+=newscore;
|
| 56 |
+
negmove[j]+=newscore;
|
| 57 |
+
plus1fert[i]+=newscore;
|
| 58 |
+
minus1fert[msc(j)]+=newscore;
|
| 59 |
+
}
|
| 60 |
+
for(PositionIndex j1=1;j1<=m;j1++)
|
| 61 |
+
for(PositionIndex j2=j1+1;j2<=m;j2++)
|
| 62 |
+
if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
|
| 63 |
+
{
|
| 64 |
+
LogProb newscore=ascore*msc.cswap(j1,j2);
|
| 65 |
+
total_swap+=newscore;
|
| 66 |
+
nAl++;
|
| 67 |
+
cswap(msc(j1),j2)+=newscore;
|
| 68 |
+
cswap(msc(j2),j1)+=newscore;
|
| 69 |
+
negswap[j1]+=newscore;
|
| 70 |
+
negswap[j2]+=newscore;
|
| 71 |
+
}
|
| 72 |
+
total_count+=total_move+total_swap;
|
| 73 |
+
for(PositionIndex j=1;j<=m;j++)
|
| 74 |
+
for(PositionIndex i=0;i<=l;i++)
|
| 75 |
+
dtcount(i,j) += ((i==msc(j)) ? (total_count-(negmove[j]+negswap[j])) : (cswap(i,j)+cmove(i,j)));
|
| 76 |
+
for(PositionIndex i=1;i<=l;i++)
|
| 77 |
+
{
|
| 78 |
+
LogProb temp=minus1fert[i]+plus1fert[i];
|
| 79 |
+
if( msc.fert(i)<MAX_FERTILITY )
|
| 80 |
+
ncount(i,msc.fert(i))+=total_count-temp;
|
| 81 |
+
if(msc.fert(i)>0&&msc.fert(i)-1<MAX_FERTILITY)
|
| 82 |
+
ncount(i,msc.fert(i)-1)+=minus1fert[i];
|
| 83 |
+
else
|
| 84 |
+
if( minus1fert[i]!=0.0 )
|
| 85 |
+
cerr << "ERROR: M1Fa: " << minus1fert[i] << ' ' << i << ' ' << msc.fert(i)<< endl;
|
| 86 |
+
if(msc.fert(i)+1<MAX_FERTILITY)
|
| 87 |
+
ncount(i,msc.fert(i)+1)+=plus1fert[i];
|
| 88 |
+
}
|
| 89 |
+
LogProb temp=minus1fert[0]+plus1fert[0];
|
| 90 |
+
p1count += (total_count-temp)*(LogProb)msc.fert(0);
|
| 91 |
+
p0count += (total_count-temp)*(LogProb)(m-2*msc.fert(0));
|
| 92 |
+
if( msc.fert(0)>0 )
|
| 93 |
+
{
|
| 94 |
+
p1count += (minus1fert[0])*(LogProb)(msc.fert(0)-1);
|
| 95 |
+
p0count += (minus1fert[0])*(LogProb)(m-2*(msc.fert(0)-1));
|
| 96 |
+
}
|
| 97 |
+
else
|
| 98 |
+
if( minus1fert[0]!=0.0 )
|
| 99 |
+
cerr << "ERROR: M1Fb: " << minus1fert[0] << endl;
|
| 100 |
+
if(int(m)-2*(int(msc.fert(0))+1)>=0)
|
| 101 |
+
{
|
| 102 |
+
p1count += (plus1fert[0])*(LogProb)(msc.fert(0)+1);
|
| 103 |
+
p0count += (plus1fert[0])*(LogProb)(m-2*(msc.fert(0)+1));
|
| 104 |
+
}
|
| 105 |
+
msc.check();
|
| 106 |
+
return nAl;
|
| 107 |
+
};
|
| 108 |
+
|
| 109 |
+
template<class TRANSPAIR>
|
| 110 |
+
double collectCountsOverNeighborhoodForSophisticatedModels(const MoveSwapMatrix<TRANSPAIR>&,LogProb,void*)
|
| 111 |
+
{
|
| 112 |
+
return 0.0;
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
template<class TRANSPAIR>
|
| 116 |
+
void _collectCountsOverNeighborhoodForSophisticatedModels(const MoveSwapMatrix<TRANSPAIR>&Mmsc,const alignment&msc,const TRANSPAIR&ef,LogProb normalized_ascore,d4model*d4Table)
|
| 117 |
+
{
|
| 118 |
+
Mmsc.check();
|
| 119 |
+
const PositionIndex m=msc.get_m(),l=msc.get_l();
|
| 120 |
+
for(PositionIndex j=1;j<=m;++j)
|
| 121 |
+
if( msc(j)!=0 )
|
| 122 |
+
if( msc.get_head(msc(j))==j)
|
| 123 |
+
{
|
| 124 |
+
int ep=msc.prev_cept(msc(j));
|
| 125 |
+
//massert( &d4Table->getCountRef_first(j,msc.get_center(ep),d4Table->ewordclasses.getClass(ef.get_es(ep)),d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m) == ef.getCountFirst(ep,j,msc.get_center(ep)));
|
| 126 |
+
d4Table->getCountRef_first(j,msc.get_center(ep),d4Table->ewordclasses.getClass(ef.get_es(ep)),d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m)+=normalized_ascore;
|
| 127 |
+
}
|
| 128 |
+
else
|
| 129 |
+
{
|
| 130 |
+
//massert( &d4Table->getCountRef_bigger(j,msc.prev_in_cept(j),0,d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m) == ef.getCountSecond(j,msc.prev_in_cept(j) ));
|
| 131 |
+
d4Table->getCountRef_bigger(j,msc.prev_in_cept(j),0,d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m)+=normalized_ascore;
|
| 132 |
+
}
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
template<class TRANSPAIR>
|
| 136 |
+
void _collectCountsOverNeighborhoodForSophisticatedModels(const MoveSwapMatrix<TRANSPAIR>&Mmsc,const alignment&msc,const TRANSPAIR&ef,LogProb normalized_ascore,d5model*d5Table)
|
| 137 |
+
{
|
| 138 |
+
Mmsc.check();
|
| 139 |
+
_collectCountsOverNeighborhoodForSophisticatedModels(Mmsc,msc,ef,normalized_ascore,&d5Table->d4m);
|
| 140 |
+
Mmsc.check();
|
| 141 |
+
const PositionIndex m=msc.get_m(),l=msc.get_l();
|
| 142 |
+
PositionIndex prev_cept=0;
|
| 143 |
+
PositionIndex vac_all=m;
|
| 144 |
+
Vector<char> vac(m+1,0);
|
| 145 |
+
for(PositionIndex i=1;i<=l;i++)
|
| 146 |
+
{
|
| 147 |
+
PositionIndex cur_j=msc.als_i[i];
|
| 148 |
+
PositionIndex prev_j=0;
|
| 149 |
+
PositionIndex k=0;
|
| 150 |
+
if(cur_j) { // process first word of cept
|
| 151 |
+
k++;
|
| 152 |
+
d5Table->getCountRef_first(vacancies(vac,cur_j),vacancies(vac,msc.get_center(prev_cept)),
|
| 153 |
+
d5Table->fwordclasses.getClass(ef.get_fs(cur_j)),l,m,vac_all-msc.fert(i)+k)+=normalized_ascore;
|
| 154 |
+
vac_all--;
|
| 155 |
+
assert(vac[cur_j]==0);
|
| 156 |
+
vac[cur_j]=1;
|
| 157 |
+
Mmsc.check();
|
| 158 |
+
prev_j=cur_j;
|
| 159 |
+
cur_j=msc.als_j[cur_j].next;
|
| 160 |
+
}
|
| 161 |
+
while(cur_j) { // process following words of cept
|
| 162 |
+
k++;
|
| 163 |
+
int vprev=vacancies(vac,prev_j);
|
| 164 |
+
d5Table->getCountRef_bigger(vacancies(vac,cur_j),vprev,d5Table->fwordclasses.getClass(ef.get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-msc.fert(i)+k)+=normalized_ascore;
|
| 165 |
+
vac_all--;
|
| 166 |
+
vac[cur_j]=1;
|
| 167 |
+
Mmsc.check();
|
| 168 |
+
prev_j=cur_j;
|
| 169 |
+
cur_j=msc.als_j[cur_j].next;
|
| 170 |
+
}
|
| 171 |
+
assert(k==msc.fert(i));
|
| 172 |
+
if( k )
|
| 173 |
+
prev_cept=i;
|
| 174 |
+
}
|
| 175 |
+
assert(vac_all==msc.fert(0));
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
extern int NumberOfAlignmentsInSophisticatedCountCollection;
|
| 179 |
+
|
| 180 |
+
template<class TRANSPAIR,class MODEL>
|
| 181 |
+
double collectCountsOverNeighborhoodForSophisticatedModels(const MoveSwapMatrix<TRANSPAIR>&msc,LogProb normalized_ascore,MODEL*d5Table)
|
| 182 |
+
{
|
| 183 |
+
const PositionIndex m=msc.get_m(),l=msc.get_l();
|
| 184 |
+
alignment x(msc);
|
| 185 |
+
double sum=0;
|
| 186 |
+
msc.check();
|
| 187 |
+
if( !msc.isCenterDeleted() )
|
| 188 |
+
{
|
| 189 |
+
_collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc,x,msc.get_ef(),normalized_ascore,d5Table);
|
| 190 |
+
NumberOfAlignmentsInSophisticatedCountCollection++;
|
| 191 |
+
sum+=normalized_ascore;
|
| 192 |
+
}
|
| 193 |
+
msc.check();
|
| 194 |
+
for(WordIndex j=1;j<=m;j++)for(WordIndex i=0;i<=l;i++)
|
| 195 |
+
{
|
| 196 |
+
WordIndex old=x(j);
|
| 197 |
+
if( i!=old&& !msc.isDelMove(i,j) )
|
| 198 |
+
{
|
| 199 |
+
msc.check();
|
| 200 |
+
double c=msc.cmove(i,j)*normalized_ascore;
|
| 201 |
+
if(c > COUNTINCREASE_CUTOFF_AL )
|
| 202 |
+
{
|
| 203 |
+
x.set(j,i);
|
| 204 |
+
_collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc,x,msc.get_ef(),c,d5Table);
|
| 205 |
+
NumberOfAlignmentsInSophisticatedCountCollection++;
|
| 206 |
+
x.set(j,old);
|
| 207 |
+
sum+=c;
|
| 208 |
+
}
|
| 209 |
+
msc.check();
|
| 210 |
+
}
|
| 211 |
+
}
|
| 212 |
+
for(PositionIndex j1=1;j1<=m;j1++)
|
| 213 |
+
for(PositionIndex j2=j1+1;j2<=m;j2++)
|
| 214 |
+
if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
|
| 215 |
+
{
|
| 216 |
+
double c=msc.cswap(j1,j2)*normalized_ascore;
|
| 217 |
+
msc.check();
|
| 218 |
+
if(c > COUNTINCREASE_CUTOFF_AL )
|
| 219 |
+
{
|
| 220 |
+
int old1=msc(j1),old2=msc(j2);
|
| 221 |
+
x.set(j1,old2);
|
| 222 |
+
x.set(j2,old1);
|
| 223 |
+
_collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc,x,msc.get_ef(),c,d5Table);
|
| 224 |
+
NumberOfAlignmentsInSophisticatedCountCollection++;
|
| 225 |
+
x.set(j1,old1);
|
| 226 |
+
x.set(j2,old2);
|
| 227 |
+
sum+=c;
|
| 228 |
+
}
|
| 229 |
+
msc.check();
|
| 230 |
+
}
|
| 231 |
+
msc.check();
|
| 232 |
+
return sum;
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
template<class TRANSPAIR,class MODEL>
|
| 236 |
+
int collectCountsOverNeighborhood(const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,Vector<WordIndex>&es,Vector<WordIndex>&fs,tmodel<COUNT,PROB>&tTable,amodel<COUNT>&aCountTable,amodel<COUNT>&dCountTable,nmodel<COUNT>&nCountTable,double&p1count,double&p0count,LogProb&_total,float count,bool addCounts,MODEL*d4Table)
|
| 237 |
+
{
|
| 238 |
+
int nAl=0;
|
| 239 |
+
const PositionIndex l=es.size()-1,m=fs.size()-1;
|
| 240 |
+
Array2<LogProb,Vector<LogProb> > dtcount(l+1,m+1),ncount(l+1,MAX_FERTILITY+1);
|
| 241 |
+
LogProb p0=0,p1=0,all_total=0;
|
| 242 |
+
for(unsigned int i=0;i<smsc.size();++i)
|
| 243 |
+
{
|
| 244 |
+
LogProb this_total=0;
|
| 245 |
+
nAl+=collectCountsOverNeighborhood(*smsc[i].first,smsc[i].second,dtcount,ncount,p1,p0,this_total);
|
| 246 |
+
all_total+=this_total;
|
| 247 |
+
}
|
| 248 |
+
_total=all_total;
|
| 249 |
+
all_total/=(double)count;
|
| 250 |
+
double sum2=0;
|
| 251 |
+
if( addCounts && d4Table )
|
| 252 |
+
{
|
| 253 |
+
for(unsigned int i=0;i<smsc.size();++i)
|
| 254 |
+
{
|
| 255 |
+
//for(WordIndex j=1;j<=m;j++)for(WordIndex ii=0;ii<=l;ii++)
|
| 256 |
+
// (*smsc[i].first).cmove(ii,j);
|
| 257 |
+
sum2+=collectCountsOverNeighborhoodForSophisticatedModels(*smsc[i].first,smsc[i].second/all_total,d4Table);
|
| 258 |
+
}
|
| 259 |
+
if(!(fabs(count-sum2)<0.05))
|
| 260 |
+
cerr << "WARNING: DIFFERENT SUMS: (" << count << ") (" << sum2 << ")\n";
|
| 261 |
+
}
|
| 262 |
+
if( addCounts )
|
| 263 |
+
{
|
| 264 |
+
for(PositionIndex i=0;i<=l;i++)
|
| 265 |
+
{
|
| 266 |
+
for(PositionIndex j=1;j<=m;j++)
|
| 267 |
+
{
|
| 268 |
+
LogProb ijadd=dtcount(i,j)/all_total;
|
| 269 |
+
if( ijadd>COUNTINCREASE_CUTOFF_AL )
|
| 270 |
+
{
|
| 271 |
+
tTable.incCount(es[i],fs[j],ijadd);
|
| 272 |
+
dCountTable.getRef(j,i,l,m)+=ijadd;
|
| 273 |
+
aCountTable.getRef(i,j,l,m)+=ijadd;
|
| 274 |
+
}
|
| 275 |
+
}
|
| 276 |
+
if( i>0 )
|
| 277 |
+
for(PositionIndex n=0;n<MAX_FERTILITY;n++)
|
| 278 |
+
nCountTable.getRef(es[i],n)+=ncount(i,n)/all_total;
|
| 279 |
+
}
|
| 280 |
+
p0count+=p0/all_total;
|
| 281 |
+
p1count+=p1/all_total;
|
| 282 |
+
}
|
| 283 |
+
return nAl;
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
|
tools/giza-pp/GIZA++-v2/collCounts.h
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#ifndef collCounts_h_defined
|
| 24 |
+
#define collCounts_h_defined
|
| 25 |
+
#include "alignment.h"
|
| 26 |
+
#include "transpair_model3.h"
|
| 27 |
+
#include <map>
|
| 28 |
+
#include "MoveSwapMatrix.h"
|
| 29 |
+
#include "D4Tables.h"
|
| 30 |
+
#include "transpair_model4.h"
|
| 31 |
+
|
| 32 |
+
class OneMoveSwap
|
| 33 |
+
{
|
| 34 |
+
public:
|
| 35 |
+
short type;
|
| 36 |
+
short a,b;
|
| 37 |
+
OneMoveSwap(short _type,short _a,short _b)
|
| 38 |
+
: type(_type),a(_a),b(_b)
|
| 39 |
+
{}
|
| 40 |
+
OneMoveSwap()
|
| 41 |
+
: type(0){}
|
| 42 |
+
};
|
| 43 |
+
|
| 44 |
+
inline bool operator<(const OneMoveSwap&a,const OneMoveSwap&b)
|
| 45 |
+
{
|
| 46 |
+
if(a.type<b.type)return 1;
|
| 47 |
+
else if(b.type<a.type)return 0;
|
| 48 |
+
else if(a.a<b.a)return 1;
|
| 49 |
+
else if(b.a<a.a)return 0;
|
| 50 |
+
else return a.b<b.b;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
inline bool operator==(const OneMoveSwap&a,const OneMoveSwap&b)
|
| 54 |
+
{
|
| 55 |
+
return a.type==b.type&&a.a==b.a&&a.b==b.b;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
inline ostream&operator<<(ostream&out,const OneMoveSwap&o)
|
| 59 |
+
{
|
| 60 |
+
return out << '(' << o.type << "," << o.a << "," << o.b << ")";
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
inline ostream &operator<<(ostream &out,const set<OneMoveSwap>&s)
|
| 64 |
+
{
|
| 65 |
+
for(set<OneMoveSwap>::const_iterator i=s.begin();i!=s.end();++i)
|
| 66 |
+
cout << *i << ' ';
|
| 67 |
+
return out;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
bool makeOneMoveSwap(const alignment&a,const alignment&b,set<OneMoveSwap>&oms);
|
| 71 |
+
|
| 72 |
+
template<class TRANSPAIR,class MODEL>
|
| 73 |
+
int collectCountsOverNeighborhood(const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
|
| 74 |
+
Vector<WordIndex>&es,
|
| 75 |
+
Vector<WordIndex>&fs,tmodel<COUNT,PROB>&tTable,
|
| 76 |
+
amodel<COUNT>&aCountTable,amodel<COUNT>&dCountTable,
|
| 77 |
+
nmodel<COUNT>&nCountTable,double&p1count,double&p0count,
|
| 78 |
+
LogProb&_total,float count,bool addCounts,MODEL*d4Table=0);
|
| 79 |
+
|
| 80 |
+
#endif
|
tools/giza-pp/GIZA++-v2/defs.h
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
#ifndef _defs_h
|
| 23 |
+
#define _defs_h 1
|
| 24 |
+
#include <string>
|
| 25 |
+
#include <math.h>
|
| 26 |
+
#include <limits.h>
|
| 27 |
+
|
| 28 |
+
const int TRANSFER_SIMPLE=1;
|
| 29 |
+
const int TRANSFER=0;
|
| 30 |
+
|
| 31 |
+
const unsigned int MAX_SENTENCE_LENGTH_ALLOWED=101;
|
| 32 |
+
const int TRAIN_BUFFER_SIZE= 50000;
|
| 33 |
+
//#ifdef WORDINDEX_WITH_4_BYTE
|
| 34 |
+
typedef unsigned int WordIndex;
|
| 35 |
+
const unsigned int MAX_VOCAB_SIZE=UINT_MAX;
|
| 36 |
+
typedef unsigned int PositionIndex;
|
| 37 |
+
//#else
|
| 38 |
+
//typedef unsigned short WordIndex;
|
| 39 |
+
//const unsigned int MAX_VOCAB_SIZE=USHRT_MAX;
|
| 40 |
+
//typedef unsigned short PositionIndex;
|
| 41 |
+
//#endif
|
| 42 |
+
extern WordIndex MAX_FERTILITY;
|
| 43 |
+
|
| 44 |
+
const int MAX_W=457979;
|
| 45 |
+
extern double LAMBDA; // Lambda that is used to scale cross_entropy factor
|
| 46 |
+
|
| 47 |
+
typedef float PROB ;
|
| 48 |
+
typedef float COUNT ;
|
| 49 |
+
|
| 50 |
+
class LogProb {
|
| 51 |
+
private:
|
| 52 |
+
double x ;
|
| 53 |
+
public:
|
| 54 |
+
LogProb():x(0){}
|
| 55 |
+
LogProb(double y):x(y){}
|
| 56 |
+
LogProb(float y):x(y){}
|
| 57 |
+
LogProb(int y):x(y){}
|
| 58 |
+
LogProb(WordIndex y):x(y){}
|
| 59 |
+
operator double() const {return x;}
|
| 60 |
+
LogProb operator *= (double y) { x *= y ; return *this;}
|
| 61 |
+
LogProb operator *= (LogProb y) { x *= y.x ; return *this;}
|
| 62 |
+
LogProb operator /= (double y) { x /= y ; return *this;}
|
| 63 |
+
LogProb operator /= (LogProb y) { x /= y.x ; return *this;}
|
| 64 |
+
LogProb operator += (double y) { x += y ; return *this;}
|
| 65 |
+
LogProb operator += (LogProb y) { x += y.x ; return *this;}
|
| 66 |
+
};
|
| 67 |
+
|
| 68 |
+
const int PARLEV_ITER=1;
|
| 69 |
+
const int PARLEV_OPTHEUR=2;
|
| 70 |
+
const int PARLEV_OUTPUT=3;
|
| 71 |
+
const int PARLEV_SMOOTH=4;
|
| 72 |
+
const int PARLEV_EM=5;
|
| 73 |
+
const int PARLEV_MODELS=6;
|
| 74 |
+
const int PARLEV_SPECIAL=7;
|
| 75 |
+
const int PARLEV_INPUT=8;
|
| 76 |
+
|
| 77 |
+
#endif
|
| 78 |
+
|
tools/giza-pp/GIZA++-v2/dependencies
ADDED
|
@@ -0,0 +1,635 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#Automatically generated dependecy list
|
| 2 |
+
optimized/alignment.o: alignment.cpp alignment.h Vector.h mystl.h myassert.h \
|
| 3 |
+
mymath.h Array2.h defs.h
|
| 4 |
+
optimized/AlignTables.o: AlignTables.cpp AlignTables.h defs.h Vector.h mystl.h \
|
| 5 |
+
myassert.h mymath.h Array2.h transpair_model1.h NTables.h vocab.h \
|
| 6 |
+
ATables.h Array4.h TTables.h Globals.h alignment.h
|
| 7 |
+
optimized/ATables.o: ATables.cpp ATables.h defs.h Vector.h mystl.h myassert.h \
|
| 8 |
+
mymath.h Array2.h Array4.h Globals.h Parameter.h Pointer.h
|
| 9 |
+
optimized/collCounts.o: collCounts.cpp alignment.h Vector.h mystl.h myassert.h \
|
| 10 |
+
mymath.h Array2.h defs.h transpair_model3.h NTables.h vocab.h ATables.h \
|
| 11 |
+
Array4.h TTables.h Globals.h transpair_model2.h transpair_model1.h \
|
| 12 |
+
collCounts.h MoveSwapMatrix.h D4Tables.h WordClasses.h \
|
| 13 |
+
transpair_model4.h D5Tables.h transpair_model5.h transpair_modelhmm.h \
|
| 14 |
+
ForwardBackward.h Array.h hmm.h getSentence.h model2.h model1.h \
|
| 15 |
+
Perplexity.h Dictionary.h HMMTables.h FlexArray.h Parameter.h Pointer.h
|
| 16 |
+
optimized/Dictionary.o: Dictionary.cpp Dictionary.h Vector.h mystl.h myassert.h \
|
| 17 |
+
mymath.h Array2.h
|
| 18 |
+
optimized/ForwardBackward.o: ForwardBackward.cpp ForwardBackward.h myassert.h \
|
| 19 |
+
Array.h Vector.h mystl.h mymath.h Array2.h Globals.h defs.h HMMTables.h \
|
| 20 |
+
FlexArray.h
|
| 21 |
+
optimized/getSentence.o: getSentence.cpp getSentence.h Vector.h mystl.h myassert.h \
|
| 22 |
+
mymath.h Array2.h defs.h vocab.h Globals.h Parameter.h Pointer.h
|
| 23 |
+
optimized/hmm.o: hmm.cpp hmm.h Vector.h mystl.h myassert.h mymath.h Array2.h \
|
| 24 |
+
TTables.h defs.h vocab.h Globals.h ATables.h Array4.h getSentence.h \
|
| 25 |
+
model2.h model1.h Perplexity.h Dictionary.h WordClasses.h HMMTables.h \
|
| 26 |
+
FlexArray.h Array.h ForwardBackward.h utility.h Parameter.h Pointer.h \
|
| 27 |
+
HMMTables.cpp
|
| 28 |
+
optimized/HMMTables.o: HMMTables.cpp HMMTables.h FlexArray.h Array.h Vector.h \
|
| 29 |
+
mystl.h myassert.h mymath.h Array2.h Globals.h defs.h Parameter.h \
|
| 30 |
+
Pointer.h
|
| 31 |
+
optimized/logprob.o: logprob.cpp logprob.h
|
| 32 |
+
optimized/main.o: main.cpp getSentence.h Vector.h mystl.h myassert.h mymath.h \
|
| 33 |
+
Array2.h defs.h vocab.h Globals.h TTables.h model1.h Perplexity.h \
|
| 34 |
+
Dictionary.h model2.h ATables.h Array4.h model3.h MoveSwapMatrix.h \
|
| 35 |
+
alignment.h transpair_model3.h NTables.h transpair_model2.h \
|
| 36 |
+
transpair_model1.h transpair_modelhmm.h ForwardBackward.h Array.h hmm.h \
|
| 37 |
+
WordClasses.h HMMTables.h FlexArray.h D4Tables.h AlignTables.h \
|
| 38 |
+
file_spec.h utility.h Parameter.h Pointer.h D5Tables.h \
|
| 39 |
+
transpair_model4.h transpair_model5.h
|
| 40 |
+
optimized/model1.o: model1.cpp model1.h Vector.h mystl.h myassert.h mymath.h \
|
| 41 |
+
Array2.h vocab.h defs.h TTables.h Globals.h getSentence.h Perplexity.h \
|
| 42 |
+
Dictionary.h utility.h Parameter.h Pointer.h
|
| 43 |
+
optimized/model2.o: model2.cpp model2.h Vector.h mystl.h myassert.h mymath.h \
|
| 44 |
+
Array2.h TTables.h defs.h vocab.h Globals.h ATables.h Array4.h \
|
| 45 |
+
getSentence.h model1.h Perplexity.h Dictionary.h utility.h Parameter.h \
|
| 46 |
+
Pointer.h
|
| 47 |
+
optimized/model2to3.o: model2to3.cpp model3.h Vector.h mystl.h myassert.h mymath.h \
|
| 48 |
+
Array2.h MoveSwapMatrix.h alignment.h defs.h transpair_model3.h \
|
| 49 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 50 |
+
transpair_model2.h transpair_model1.h getSentence.h model2.h model1.h \
|
| 51 |
+
Perplexity.h Dictionary.h transpair_modelhmm.h ForwardBackward.h \
|
| 52 |
+
Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h D4Tables.h \
|
| 53 |
+
AlignTables.h utility.h
|
| 54 |
+
optimized/model345-peg.o: model345-peg.cpp model3.h Vector.h mystl.h myassert.h \
|
| 55 |
+
mymath.h Array2.h MoveSwapMatrix.h alignment.h defs.h \
|
| 56 |
+
transpair_model3.h NTables.h vocab.h ATables.h Array4.h TTables.h \
|
| 57 |
+
Globals.h transpair_model2.h transpair_model1.h getSentence.h model2.h \
|
| 58 |
+
model1.h Perplexity.h Dictionary.h transpair_modelhmm.h \
|
| 59 |
+
ForwardBackward.h Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h \
|
| 60 |
+
D4Tables.h AlignTables.h collCounts.h transpair_model4.h
|
| 61 |
+
optimized/model3.o: model3.cpp model3.h Vector.h mystl.h myassert.h mymath.h \
|
| 62 |
+
Array2.h MoveSwapMatrix.h alignment.h defs.h transpair_model3.h \
|
| 63 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 64 |
+
transpair_model2.h transpair_model1.h getSentence.h model2.h model1.h \
|
| 65 |
+
Perplexity.h Dictionary.h transpair_modelhmm.h ForwardBackward.h \
|
| 66 |
+
Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h D4Tables.h \
|
| 67 |
+
AlignTables.h collCounts.h transpair_model4.h utility.h D5Tables.h \
|
| 68 |
+
transpair_model5.h Parameter.h Pointer.h
|
| 69 |
+
optimized/model3_viterbi.o: model3_viterbi.cpp model3.h Vector.h mystl.h myassert.h \
|
| 70 |
+
mymath.h Array2.h MoveSwapMatrix.h alignment.h defs.h \
|
| 71 |
+
transpair_model3.h NTables.h vocab.h ATables.h Array4.h TTables.h \
|
| 72 |
+
Globals.h transpair_model2.h transpair_model1.h getSentence.h model2.h \
|
| 73 |
+
model1.h Perplexity.h Dictionary.h transpair_modelhmm.h \
|
| 74 |
+
ForwardBackward.h Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h \
|
| 75 |
+
D4Tables.h AlignTables.h utility.h
|
| 76 |
+
optimized/model3_viterbi_with_tricks.o: model3_viterbi_with_tricks.cpp mystl.h \
|
| 77 |
+
myassert.h mymath.h Array2.h model3.h Vector.h MoveSwapMatrix.h \
|
| 78 |
+
alignment.h defs.h transpair_model3.h NTables.h vocab.h ATables.h \
|
| 79 |
+
Array4.h TTables.h Globals.h transpair_model2.h transpair_model1.h \
|
| 80 |
+
getSentence.h model2.h model1.h Perplexity.h Dictionary.h \
|
| 81 |
+
transpair_modelhmm.h ForwardBackward.h Array.h hmm.h WordClasses.h \
|
| 82 |
+
HMMTables.h FlexArray.h D4Tables.h AlignTables.h collCounts.h \
|
| 83 |
+
transpair_model4.h utility.h D5Tables.h transpair_model5.h Parameter.h \
|
| 84 |
+
Pointer.h collCounts.cpp
|
| 85 |
+
optimized/MoveSwapMatrix.o: MoveSwapMatrix.cpp MoveSwapMatrix.h alignment.h Vector.h \
|
| 86 |
+
mystl.h myassert.h mymath.h Array2.h defs.h transpair_model3.h \
|
| 87 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 88 |
+
transpair_model2.h transpair_model1.h transpair_model4.h D4Tables.h \
|
| 89 |
+
WordClasses.h transpair_model5.h D5Tables.h transpair_modelhmm.h \
|
| 90 |
+
ForwardBackward.h Array.h hmm.h getSentence.h model2.h model1.h \
|
| 91 |
+
Perplexity.h Dictionary.h HMMTables.h FlexArray.h
|
| 92 |
+
optimized/myassert.o: myassert.cpp mystl.h myassert.h mymath.h Array2.h
|
| 93 |
+
optimized/NTables.o: NTables.cpp NTables.h Array2.h mystl.h myassert.h mymath.h \
|
| 94 |
+
Vector.h defs.h vocab.h Parameter.h Pointer.h Globals.h
|
| 95 |
+
optimized/Parameter.o: Parameter.cpp Parameter.h mystl.h myassert.h mymath.h \
|
| 96 |
+
Array2.h Pointer.h Globals.h defs.h Vector.h
|
| 97 |
+
optimized/parse.o: parse.cpp defs.h utility.h Perplexity.h Vector.h mystl.h \
|
| 98 |
+
myassert.h mymath.h Array2.h Globals.h TTables.h vocab.h getSentence.h \
|
| 99 |
+
D4Tables.h WordClasses.h D5Tables.h ATables.h Array4.h Parameter.h \
|
| 100 |
+
Pointer.h
|
| 101 |
+
optimized/Perplexity.o: Perplexity.cpp Perplexity.h Vector.h mystl.h myassert.h \
|
| 102 |
+
mymath.h Array2.h defs.h Globals.h
|
| 103 |
+
optimized/plain2snt.o: plain2snt.cpp
|
| 104 |
+
optimized/reports.o: reports.cpp defs.h vocab.h Vector.h mystl.h myassert.h mymath.h \
|
| 105 |
+
Array2.h Perplexity.h Globals.h getSentence.h TTables.h Parameter.h \
|
| 106 |
+
Pointer.h
|
| 107 |
+
optimized/snt2cooc.o: snt2cooc.cpp
|
| 108 |
+
optimized/snt2plain.o: snt2plain.cpp
|
| 109 |
+
optimized/transpair_model3.o: transpair_model3.cpp transpair_model3.h Array2.h \
|
| 110 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 111 |
+
Array4.h TTables.h Globals.h alignment.h transpair_model2.h \
|
| 112 |
+
transpair_model1.h
|
| 113 |
+
optimized/transpair_model4.o: transpair_model4.cpp transpair_model4.h Array2.h \
|
| 114 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 115 |
+
Array4.h TTables.h Globals.h alignment.h D4Tables.h WordClasses.h \
|
| 116 |
+
transpair_model3.h transpair_model2.h transpair_model1.h Parameter.h \
|
| 117 |
+
Pointer.h
|
| 118 |
+
optimized/transpair_model5.o: transpair_model5.cpp transpair_model5.h Array2.h \
|
| 119 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 120 |
+
Array4.h TTables.h Globals.h alignment.h D5Tables.h D4Tables.h \
|
| 121 |
+
WordClasses.h transpair_model4.h transpair_model3.h transpair_model2.h \
|
| 122 |
+
transpair_model1.h Parameter.h Pointer.h
|
| 123 |
+
optimized/TTables.o: TTables.cpp TTables.h defs.h vocab.h Vector.h mystl.h \
|
| 124 |
+
myassert.h mymath.h Array2.h Globals.h Parameter.h Pointer.h
|
| 125 |
+
optimized/utility.o: utility.cpp mymath.h
|
| 126 |
+
optimized/vocab.o: vocab.cpp vocab.h defs.h Vector.h mystl.h myassert.h mymath.h \
|
| 127 |
+
Array2.h
|
| 128 |
+
#Automatically generated dependecy list
|
| 129 |
+
debug/alignment.o: alignment.cpp alignment.h Vector.h mystl.h myassert.h \
|
| 130 |
+
mymath.h Array2.h defs.h
|
| 131 |
+
debug/AlignTables.o: AlignTables.cpp AlignTables.h defs.h Vector.h mystl.h \
|
| 132 |
+
myassert.h mymath.h Array2.h transpair_model1.h NTables.h vocab.h \
|
| 133 |
+
ATables.h Array4.h TTables.h Globals.h alignment.h
|
| 134 |
+
debug/ATables.o: ATables.cpp ATables.h defs.h Vector.h mystl.h myassert.h \
|
| 135 |
+
mymath.h Array2.h Array4.h Globals.h Parameter.h Pointer.h
|
| 136 |
+
debug/collCounts.o: collCounts.cpp alignment.h Vector.h mystl.h myassert.h \
|
| 137 |
+
mymath.h Array2.h defs.h transpair_model3.h NTables.h vocab.h ATables.h \
|
| 138 |
+
Array4.h TTables.h Globals.h transpair_model2.h transpair_model1.h \
|
| 139 |
+
collCounts.h MoveSwapMatrix.h D4Tables.h WordClasses.h \
|
| 140 |
+
transpair_model4.h D5Tables.h transpair_model5.h transpair_modelhmm.h \
|
| 141 |
+
ForwardBackward.h Array.h hmm.h getSentence.h model2.h model1.h \
|
| 142 |
+
Perplexity.h Dictionary.h HMMTables.h FlexArray.h Parameter.h Pointer.h
|
| 143 |
+
debug/Dictionary.o: Dictionary.cpp Dictionary.h Vector.h mystl.h myassert.h \
|
| 144 |
+
mymath.h Array2.h
|
| 145 |
+
debug/ForwardBackward.o: ForwardBackward.cpp ForwardBackward.h myassert.h \
|
| 146 |
+
Array.h Vector.h mystl.h mymath.h Array2.h Globals.h defs.h HMMTables.h \
|
| 147 |
+
FlexArray.h
|
| 148 |
+
debug/getSentence.o: getSentence.cpp getSentence.h Vector.h mystl.h myassert.h \
|
| 149 |
+
mymath.h Array2.h defs.h vocab.h Globals.h Parameter.h Pointer.h
|
| 150 |
+
debug/hmm.o: hmm.cpp hmm.h Vector.h mystl.h myassert.h mymath.h Array2.h \
|
| 151 |
+
TTables.h defs.h vocab.h Globals.h ATables.h Array4.h getSentence.h \
|
| 152 |
+
model2.h model1.h Perplexity.h Dictionary.h WordClasses.h HMMTables.h \
|
| 153 |
+
FlexArray.h Array.h ForwardBackward.h utility.h Parameter.h Pointer.h \
|
| 154 |
+
HMMTables.cpp
|
| 155 |
+
debug/HMMTables.o: HMMTables.cpp HMMTables.h FlexArray.h Array.h Vector.h \
|
| 156 |
+
mystl.h myassert.h mymath.h Array2.h Globals.h defs.h Parameter.h \
|
| 157 |
+
Pointer.h
|
| 158 |
+
debug/logprob.o: logprob.cpp logprob.h
|
| 159 |
+
debug/main.o: main.cpp getSentence.h Vector.h mystl.h myassert.h mymath.h \
|
| 160 |
+
Array2.h defs.h vocab.h Globals.h TTables.h model1.h Perplexity.h \
|
| 161 |
+
Dictionary.h model2.h ATables.h Array4.h model3.h MoveSwapMatrix.h \
|
| 162 |
+
alignment.h transpair_model3.h NTables.h transpair_model2.h \
|
| 163 |
+
transpair_model1.h transpair_modelhmm.h ForwardBackward.h Array.h hmm.h \
|
| 164 |
+
WordClasses.h HMMTables.h FlexArray.h D4Tables.h AlignTables.h \
|
| 165 |
+
file_spec.h utility.h Parameter.h Pointer.h D5Tables.h \
|
| 166 |
+
transpair_model4.h transpair_model5.h
|
| 167 |
+
debug/model1.o: model1.cpp model1.h Vector.h mystl.h myassert.h mymath.h \
|
| 168 |
+
Array2.h vocab.h defs.h TTables.h Globals.h getSentence.h Perplexity.h \
|
| 169 |
+
Dictionary.h utility.h Parameter.h Pointer.h
|
| 170 |
+
debug/model2.o: model2.cpp model2.h Vector.h mystl.h myassert.h mymath.h \
|
| 171 |
+
Array2.h TTables.h defs.h vocab.h Globals.h ATables.h Array4.h \
|
| 172 |
+
getSentence.h model1.h Perplexity.h Dictionary.h utility.h Parameter.h \
|
| 173 |
+
Pointer.h
|
| 174 |
+
debug/model2to3.o: model2to3.cpp model3.h Vector.h mystl.h myassert.h mymath.h \
|
| 175 |
+
Array2.h MoveSwapMatrix.h alignment.h defs.h transpair_model3.h \
|
| 176 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 177 |
+
transpair_model2.h transpair_model1.h getSentence.h model2.h model1.h \
|
| 178 |
+
Perplexity.h Dictionary.h transpair_modelhmm.h ForwardBackward.h \
|
| 179 |
+
Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h D4Tables.h \
|
| 180 |
+
AlignTables.h utility.h
|
| 181 |
+
debug/model345-peg.o: model345-peg.cpp model3.h Vector.h mystl.h myassert.h \
|
| 182 |
+
mymath.h Array2.h MoveSwapMatrix.h alignment.h defs.h \
|
| 183 |
+
transpair_model3.h NTables.h vocab.h ATables.h Array4.h TTables.h \
|
| 184 |
+
Globals.h transpair_model2.h transpair_model1.h getSentence.h model2.h \
|
| 185 |
+
model1.h Perplexity.h Dictionary.h transpair_modelhmm.h \
|
| 186 |
+
ForwardBackward.h Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h \
|
| 187 |
+
D4Tables.h AlignTables.h collCounts.h transpair_model4.h
|
| 188 |
+
debug/model3.o: model3.cpp model3.h Vector.h mystl.h myassert.h mymath.h \
|
| 189 |
+
Array2.h MoveSwapMatrix.h alignment.h defs.h transpair_model3.h \
|
| 190 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 191 |
+
transpair_model2.h transpair_model1.h getSentence.h model2.h model1.h \
|
| 192 |
+
Perplexity.h Dictionary.h transpair_modelhmm.h ForwardBackward.h \
|
| 193 |
+
Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h D4Tables.h \
|
| 194 |
+
AlignTables.h collCounts.h transpair_model4.h utility.h D5Tables.h \
|
| 195 |
+
transpair_model5.h Parameter.h Pointer.h
|
| 196 |
+
debug/model3_viterbi.o: model3_viterbi.cpp model3.h Vector.h mystl.h myassert.h \
|
| 197 |
+
mymath.h Array2.h MoveSwapMatrix.h alignment.h defs.h \
|
| 198 |
+
transpair_model3.h NTables.h vocab.h ATables.h Array4.h TTables.h \
|
| 199 |
+
Globals.h transpair_model2.h transpair_model1.h getSentence.h model2.h \
|
| 200 |
+
model1.h Perplexity.h Dictionary.h transpair_modelhmm.h \
|
| 201 |
+
ForwardBackward.h Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h \
|
| 202 |
+
D4Tables.h AlignTables.h utility.h
|
| 203 |
+
debug/model3_viterbi_with_tricks.o: model3_viterbi_with_tricks.cpp mystl.h \
|
| 204 |
+
myassert.h mymath.h Array2.h model3.h Vector.h MoveSwapMatrix.h \
|
| 205 |
+
alignment.h defs.h transpair_model3.h NTables.h vocab.h ATables.h \
|
| 206 |
+
Array4.h TTables.h Globals.h transpair_model2.h transpair_model1.h \
|
| 207 |
+
getSentence.h model2.h model1.h Perplexity.h Dictionary.h \
|
| 208 |
+
transpair_modelhmm.h ForwardBackward.h Array.h hmm.h WordClasses.h \
|
| 209 |
+
HMMTables.h FlexArray.h D4Tables.h AlignTables.h collCounts.h \
|
| 210 |
+
transpair_model4.h utility.h D5Tables.h transpair_model5.h Parameter.h \
|
| 211 |
+
Pointer.h collCounts.cpp
|
| 212 |
+
debug/MoveSwapMatrix.o: MoveSwapMatrix.cpp MoveSwapMatrix.h alignment.h Vector.h \
|
| 213 |
+
mystl.h myassert.h mymath.h Array2.h defs.h transpair_model3.h \
|
| 214 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 215 |
+
transpair_model2.h transpair_model1.h transpair_model4.h D4Tables.h \
|
| 216 |
+
WordClasses.h transpair_model5.h D5Tables.h transpair_modelhmm.h \
|
| 217 |
+
ForwardBackward.h Array.h hmm.h getSentence.h model2.h model1.h \
|
| 218 |
+
Perplexity.h Dictionary.h HMMTables.h FlexArray.h
|
| 219 |
+
debug/myassert.o: myassert.cpp mystl.h myassert.h mymath.h Array2.h
|
| 220 |
+
debug/NTables.o: NTables.cpp NTables.h Array2.h mystl.h myassert.h mymath.h \
|
| 221 |
+
Vector.h defs.h vocab.h Parameter.h Pointer.h Globals.h
|
| 222 |
+
debug/Parameter.o: Parameter.cpp Parameter.h mystl.h myassert.h mymath.h \
|
| 223 |
+
Array2.h Pointer.h Globals.h defs.h Vector.h
|
| 224 |
+
debug/parse.o: parse.cpp defs.h utility.h Perplexity.h Vector.h mystl.h \
|
| 225 |
+
myassert.h mymath.h Array2.h Globals.h TTables.h vocab.h getSentence.h \
|
| 226 |
+
D4Tables.h WordClasses.h D5Tables.h ATables.h Array4.h Parameter.h \
|
| 227 |
+
Pointer.h
|
| 228 |
+
debug/Perplexity.o: Perplexity.cpp Perplexity.h Vector.h mystl.h myassert.h \
|
| 229 |
+
mymath.h Array2.h defs.h Globals.h
|
| 230 |
+
debug/plain2snt.o: plain2snt.cpp
|
| 231 |
+
debug/reports.o: reports.cpp defs.h vocab.h Vector.h mystl.h myassert.h mymath.h \
|
| 232 |
+
Array2.h Perplexity.h Globals.h getSentence.h TTables.h Parameter.h \
|
| 233 |
+
Pointer.h
|
| 234 |
+
debug/snt2cooc.o: snt2cooc.cpp
|
| 235 |
+
debug/snt2plain.o: snt2plain.cpp
|
| 236 |
+
debug/transpair_model3.o: transpair_model3.cpp transpair_model3.h Array2.h \
|
| 237 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 238 |
+
Array4.h TTables.h Globals.h alignment.h transpair_model2.h \
|
| 239 |
+
transpair_model1.h
|
| 240 |
+
debug/transpair_model4.o: transpair_model4.cpp transpair_model4.h Array2.h \
|
| 241 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 242 |
+
Array4.h TTables.h Globals.h alignment.h D4Tables.h WordClasses.h \
|
| 243 |
+
transpair_model3.h transpair_model2.h transpair_model1.h Parameter.h \
|
| 244 |
+
Pointer.h
|
| 245 |
+
debug/transpair_model5.o: transpair_model5.cpp transpair_model5.h Array2.h \
|
| 246 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 247 |
+
Array4.h TTables.h Globals.h alignment.h D5Tables.h D4Tables.h \
|
| 248 |
+
WordClasses.h transpair_model4.h transpair_model3.h transpair_model2.h \
|
| 249 |
+
transpair_model1.h Parameter.h Pointer.h
|
| 250 |
+
debug/TTables.o: TTables.cpp TTables.h defs.h vocab.h Vector.h mystl.h \
|
| 251 |
+
myassert.h mymath.h Array2.h Globals.h Parameter.h Pointer.h
|
| 252 |
+
debug/utility.o: utility.cpp mymath.h
|
| 253 |
+
debug/vocab.o: vocab.cpp vocab.h defs.h Vector.h mystl.h myassert.h mymath.h \
|
| 254 |
+
Array2.h
|
| 255 |
+
#Automatically generated dependecy list
|
| 256 |
+
vdebug/alignment.o: alignment.cpp alignment.h Vector.h mystl.h myassert.h \
|
| 257 |
+
mymath.h Array2.h defs.h
|
| 258 |
+
vdebug/AlignTables.o: AlignTables.cpp AlignTables.h defs.h Vector.h mystl.h \
|
| 259 |
+
myassert.h mymath.h Array2.h transpair_model1.h NTables.h vocab.h \
|
| 260 |
+
ATables.h Array4.h TTables.h Globals.h alignment.h
|
| 261 |
+
vdebug/ATables.o: ATables.cpp ATables.h defs.h Vector.h mystl.h myassert.h \
|
| 262 |
+
mymath.h Array2.h Array4.h Globals.h Parameter.h Pointer.h
|
| 263 |
+
vdebug/collCounts.o: collCounts.cpp alignment.h Vector.h mystl.h myassert.h \
|
| 264 |
+
mymath.h Array2.h defs.h transpair_model3.h NTables.h vocab.h ATables.h \
|
| 265 |
+
Array4.h TTables.h Globals.h transpair_model2.h transpair_model1.h \
|
| 266 |
+
collCounts.h MoveSwapMatrix.h D4Tables.h WordClasses.h \
|
| 267 |
+
transpair_model4.h D5Tables.h transpair_model5.h transpair_modelhmm.h \
|
| 268 |
+
ForwardBackward.h Array.h hmm.h getSentence.h model2.h model1.h \
|
| 269 |
+
Perplexity.h Dictionary.h HMMTables.h FlexArray.h Parameter.h Pointer.h
|
| 270 |
+
vdebug/Dictionary.o: Dictionary.cpp Dictionary.h Vector.h mystl.h myassert.h \
|
| 271 |
+
mymath.h Array2.h
|
| 272 |
+
vdebug/ForwardBackward.o: ForwardBackward.cpp ForwardBackward.h myassert.h \
|
| 273 |
+
Array.h Vector.h mystl.h mymath.h Array2.h Globals.h defs.h HMMTables.h \
|
| 274 |
+
FlexArray.h
|
| 275 |
+
vdebug/getSentence.o: getSentence.cpp getSentence.h Vector.h mystl.h myassert.h \
|
| 276 |
+
mymath.h Array2.h defs.h vocab.h Globals.h Parameter.h Pointer.h
|
| 277 |
+
vdebug/hmm.o: hmm.cpp hmm.h Vector.h mystl.h myassert.h mymath.h Array2.h \
|
| 278 |
+
TTables.h defs.h vocab.h Globals.h ATables.h Array4.h getSentence.h \
|
| 279 |
+
model2.h model1.h Perplexity.h Dictionary.h WordClasses.h HMMTables.h \
|
| 280 |
+
FlexArray.h Array.h ForwardBackward.h utility.h Parameter.h Pointer.h \
|
| 281 |
+
HMMTables.cpp
|
| 282 |
+
vdebug/HMMTables.o: HMMTables.cpp HMMTables.h FlexArray.h Array.h Vector.h \
|
| 283 |
+
mystl.h myassert.h mymath.h Array2.h Globals.h defs.h Parameter.h \
|
| 284 |
+
Pointer.h
|
| 285 |
+
vdebug/logprob.o: logprob.cpp logprob.h
|
| 286 |
+
vdebug/main.o: main.cpp getSentence.h Vector.h mystl.h myassert.h mymath.h \
|
| 287 |
+
Array2.h defs.h vocab.h Globals.h TTables.h model1.h Perplexity.h \
|
| 288 |
+
Dictionary.h model2.h ATables.h Array4.h model3.h MoveSwapMatrix.h \
|
| 289 |
+
alignment.h transpair_model3.h NTables.h transpair_model2.h \
|
| 290 |
+
transpair_model1.h transpair_modelhmm.h ForwardBackward.h Array.h hmm.h \
|
| 291 |
+
WordClasses.h HMMTables.h FlexArray.h D4Tables.h AlignTables.h \
|
| 292 |
+
file_spec.h utility.h Parameter.h Pointer.h D5Tables.h \
|
| 293 |
+
transpair_model4.h transpair_model5.h
|
| 294 |
+
vdebug/model1.o: model1.cpp model1.h Vector.h mystl.h myassert.h mymath.h \
|
| 295 |
+
Array2.h vocab.h defs.h TTables.h Globals.h getSentence.h Perplexity.h \
|
| 296 |
+
Dictionary.h utility.h Parameter.h Pointer.h
|
| 297 |
+
vdebug/model2.o: model2.cpp model2.h Vector.h mystl.h myassert.h mymath.h \
|
| 298 |
+
Array2.h TTables.h defs.h vocab.h Globals.h ATables.h Array4.h \
|
| 299 |
+
getSentence.h model1.h Perplexity.h Dictionary.h utility.h Parameter.h \
|
| 300 |
+
Pointer.h
|
| 301 |
+
vdebug/model2to3.o: model2to3.cpp model3.h Vector.h mystl.h myassert.h mymath.h \
|
| 302 |
+
Array2.h MoveSwapMatrix.h alignment.h defs.h transpair_model3.h \
|
| 303 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 304 |
+
transpair_model2.h transpair_model1.h getSentence.h model2.h model1.h \
|
| 305 |
+
Perplexity.h Dictionary.h transpair_modelhmm.h ForwardBackward.h \
|
| 306 |
+
Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h D4Tables.h \
|
| 307 |
+
AlignTables.h utility.h
|
| 308 |
+
vdebug/model345-peg.o: model345-peg.cpp model3.h Vector.h mystl.h myassert.h \
|
| 309 |
+
mymath.h Array2.h MoveSwapMatrix.h alignment.h defs.h \
|
| 310 |
+
transpair_model3.h NTables.h vocab.h ATables.h Array4.h TTables.h \
|
| 311 |
+
Globals.h transpair_model2.h transpair_model1.h getSentence.h model2.h \
|
| 312 |
+
model1.h Perplexity.h Dictionary.h transpair_modelhmm.h \
|
| 313 |
+
ForwardBackward.h Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h \
|
| 314 |
+
D4Tables.h AlignTables.h collCounts.h transpair_model4.h
|
| 315 |
+
vdebug/model3.o: model3.cpp model3.h Vector.h mystl.h myassert.h mymath.h \
|
| 316 |
+
Array2.h MoveSwapMatrix.h alignment.h defs.h transpair_model3.h \
|
| 317 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 318 |
+
transpair_model2.h transpair_model1.h getSentence.h model2.h model1.h \
|
| 319 |
+
Perplexity.h Dictionary.h transpair_modelhmm.h ForwardBackward.h \
|
| 320 |
+
Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h D4Tables.h \
|
| 321 |
+
AlignTables.h collCounts.h transpair_model4.h utility.h D5Tables.h \
|
| 322 |
+
transpair_model5.h Parameter.h Pointer.h
|
| 323 |
+
vdebug/model3_viterbi.o: model3_viterbi.cpp model3.h Vector.h mystl.h myassert.h \
|
| 324 |
+
mymath.h Array2.h MoveSwapMatrix.h alignment.h defs.h \
|
| 325 |
+
transpair_model3.h NTables.h vocab.h ATables.h Array4.h TTables.h \
|
| 326 |
+
Globals.h transpair_model2.h transpair_model1.h getSentence.h model2.h \
|
| 327 |
+
model1.h Perplexity.h Dictionary.h transpair_modelhmm.h \
|
| 328 |
+
ForwardBackward.h Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h \
|
| 329 |
+
D4Tables.h AlignTables.h utility.h
|
| 330 |
+
vdebug/model3_viterbi_with_tricks.o: model3_viterbi_with_tricks.cpp mystl.h \
|
| 331 |
+
myassert.h mymath.h Array2.h model3.h Vector.h MoveSwapMatrix.h \
|
| 332 |
+
alignment.h defs.h transpair_model3.h NTables.h vocab.h ATables.h \
|
| 333 |
+
Array4.h TTables.h Globals.h transpair_model2.h transpair_model1.h \
|
| 334 |
+
getSentence.h model2.h model1.h Perplexity.h Dictionary.h \
|
| 335 |
+
transpair_modelhmm.h ForwardBackward.h Array.h hmm.h WordClasses.h \
|
| 336 |
+
HMMTables.h FlexArray.h D4Tables.h AlignTables.h collCounts.h \
|
| 337 |
+
transpair_model4.h utility.h D5Tables.h transpair_model5.h Parameter.h \
|
| 338 |
+
Pointer.h collCounts.cpp
|
| 339 |
+
vdebug/MoveSwapMatrix.o: MoveSwapMatrix.cpp MoveSwapMatrix.h alignment.h Vector.h \
|
| 340 |
+
mystl.h myassert.h mymath.h Array2.h defs.h transpair_model3.h \
|
| 341 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 342 |
+
transpair_model2.h transpair_model1.h transpair_model4.h D4Tables.h \
|
| 343 |
+
WordClasses.h transpair_model5.h D5Tables.h transpair_modelhmm.h \
|
| 344 |
+
ForwardBackward.h Array.h hmm.h getSentence.h model2.h model1.h \
|
| 345 |
+
Perplexity.h Dictionary.h HMMTables.h FlexArray.h
|
| 346 |
+
vdebug/myassert.o: myassert.cpp mystl.h myassert.h mymath.h Array2.h
|
| 347 |
+
vdebug/NTables.o: NTables.cpp NTables.h Array2.h mystl.h myassert.h mymath.h \
|
| 348 |
+
Vector.h defs.h vocab.h Parameter.h Pointer.h Globals.h
|
| 349 |
+
vdebug/Parameter.o: Parameter.cpp Parameter.h mystl.h myassert.h mymath.h \
|
| 350 |
+
Array2.h Pointer.h Globals.h defs.h Vector.h
|
| 351 |
+
vdebug/parse.o: parse.cpp defs.h utility.h Perplexity.h Vector.h mystl.h \
|
| 352 |
+
myassert.h mymath.h Array2.h Globals.h TTables.h vocab.h getSentence.h \
|
| 353 |
+
D4Tables.h WordClasses.h D5Tables.h ATables.h Array4.h Parameter.h \
|
| 354 |
+
Pointer.h
|
| 355 |
+
vdebug/Perplexity.o: Perplexity.cpp Perplexity.h Vector.h mystl.h myassert.h \
|
| 356 |
+
mymath.h Array2.h defs.h Globals.h
|
| 357 |
+
vdebug/plain2snt.o: plain2snt.cpp
|
| 358 |
+
vdebug/reports.o: reports.cpp defs.h vocab.h Vector.h mystl.h myassert.h mymath.h \
|
| 359 |
+
Array2.h Perplexity.h Globals.h getSentence.h TTables.h Parameter.h \
|
| 360 |
+
Pointer.h
|
| 361 |
+
vdebug/snt2cooc.o: snt2cooc.cpp
|
| 362 |
+
vdebug/snt2plain.o: snt2plain.cpp
|
| 363 |
+
vdebug/transpair_model3.o: transpair_model3.cpp transpair_model3.h Array2.h \
|
| 364 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 365 |
+
Array4.h TTables.h Globals.h alignment.h transpair_model2.h \
|
| 366 |
+
transpair_model1.h
|
| 367 |
+
vdebug/transpair_model4.o: transpair_model4.cpp transpair_model4.h Array2.h \
|
| 368 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 369 |
+
Array4.h TTables.h Globals.h alignment.h D4Tables.h WordClasses.h \
|
| 370 |
+
transpair_model3.h transpair_model2.h transpair_model1.h Parameter.h \
|
| 371 |
+
Pointer.h
|
| 372 |
+
vdebug/transpair_model5.o: transpair_model5.cpp transpair_model5.h Array2.h \
|
| 373 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 374 |
+
Array4.h TTables.h Globals.h alignment.h D5Tables.h D4Tables.h \
|
| 375 |
+
WordClasses.h transpair_model4.h transpair_model3.h transpair_model2.h \
|
| 376 |
+
transpair_model1.h Parameter.h Pointer.h
|
| 377 |
+
vdebug/TTables.o: TTables.cpp TTables.h defs.h vocab.h Vector.h mystl.h \
|
| 378 |
+
myassert.h mymath.h Array2.h Globals.h Parameter.h Pointer.h
|
| 379 |
+
vdebug/utility.o: utility.cpp mymath.h
|
| 380 |
+
vdebug/vocab.o: vocab.cpp vocab.h defs.h Vector.h mystl.h myassert.h mymath.h \
|
| 381 |
+
Array2.h
|
| 382 |
+
#Automatically generated dependecy list
|
| 383 |
+
norm/alignment.o: alignment.cpp alignment.h Vector.h mystl.h myassert.h \
|
| 384 |
+
mymath.h Array2.h defs.h
|
| 385 |
+
norm/AlignTables.o: AlignTables.cpp AlignTables.h defs.h Vector.h mystl.h \
|
| 386 |
+
myassert.h mymath.h Array2.h transpair_model1.h NTables.h vocab.h \
|
| 387 |
+
ATables.h Array4.h TTables.h Globals.h alignment.h
|
| 388 |
+
norm/ATables.o: ATables.cpp ATables.h defs.h Vector.h mystl.h myassert.h \
|
| 389 |
+
mymath.h Array2.h Array4.h Globals.h Parameter.h Pointer.h
|
| 390 |
+
norm/collCounts.o: collCounts.cpp alignment.h Vector.h mystl.h myassert.h \
|
| 391 |
+
mymath.h Array2.h defs.h transpair_model3.h NTables.h vocab.h ATables.h \
|
| 392 |
+
Array4.h TTables.h Globals.h transpair_model2.h transpair_model1.h \
|
| 393 |
+
collCounts.h MoveSwapMatrix.h D4Tables.h WordClasses.h \
|
| 394 |
+
transpair_model4.h D5Tables.h transpair_model5.h transpair_modelhmm.h \
|
| 395 |
+
ForwardBackward.h Array.h hmm.h getSentence.h model2.h model1.h \
|
| 396 |
+
Perplexity.h Dictionary.h HMMTables.h FlexArray.h Parameter.h Pointer.h
|
| 397 |
+
norm/Dictionary.o: Dictionary.cpp Dictionary.h Vector.h mystl.h myassert.h \
|
| 398 |
+
mymath.h Array2.h
|
| 399 |
+
norm/ForwardBackward.o: ForwardBackward.cpp ForwardBackward.h myassert.h \
|
| 400 |
+
Array.h Vector.h mystl.h mymath.h Array2.h Globals.h defs.h HMMTables.h \
|
| 401 |
+
FlexArray.h
|
| 402 |
+
norm/getSentence.o: getSentence.cpp getSentence.h Vector.h mystl.h myassert.h \
|
| 403 |
+
mymath.h Array2.h defs.h vocab.h Globals.h Parameter.h Pointer.h
|
| 404 |
+
norm/hmm.o: hmm.cpp hmm.h Vector.h mystl.h myassert.h mymath.h Array2.h \
|
| 405 |
+
TTables.h defs.h vocab.h Globals.h ATables.h Array4.h getSentence.h \
|
| 406 |
+
model2.h model1.h Perplexity.h Dictionary.h WordClasses.h HMMTables.h \
|
| 407 |
+
FlexArray.h Array.h ForwardBackward.h utility.h Parameter.h Pointer.h \
|
| 408 |
+
HMMTables.cpp
|
| 409 |
+
norm/HMMTables.o: HMMTables.cpp HMMTables.h FlexArray.h Array.h Vector.h \
|
| 410 |
+
mystl.h myassert.h mymath.h Array2.h Globals.h defs.h Parameter.h \
|
| 411 |
+
Pointer.h
|
| 412 |
+
norm/logprob.o: logprob.cpp logprob.h
|
| 413 |
+
norm/main.o: main.cpp getSentence.h Vector.h mystl.h myassert.h mymath.h \
|
| 414 |
+
Array2.h defs.h vocab.h Globals.h TTables.h model1.h Perplexity.h \
|
| 415 |
+
Dictionary.h model2.h ATables.h Array4.h model3.h MoveSwapMatrix.h \
|
| 416 |
+
alignment.h transpair_model3.h NTables.h transpair_model2.h \
|
| 417 |
+
transpair_model1.h transpair_modelhmm.h ForwardBackward.h Array.h hmm.h \
|
| 418 |
+
WordClasses.h HMMTables.h FlexArray.h D4Tables.h AlignTables.h \
|
| 419 |
+
file_spec.h utility.h Parameter.h Pointer.h D5Tables.h \
|
| 420 |
+
transpair_model4.h transpair_model5.h
|
| 421 |
+
norm/model1.o: model1.cpp model1.h Vector.h mystl.h myassert.h mymath.h \
|
| 422 |
+
Array2.h vocab.h defs.h TTables.h Globals.h getSentence.h Perplexity.h \
|
| 423 |
+
Dictionary.h utility.h Parameter.h Pointer.h
|
| 424 |
+
norm/model2.o: model2.cpp model2.h Vector.h mystl.h myassert.h mymath.h \
|
| 425 |
+
Array2.h TTables.h defs.h vocab.h Globals.h ATables.h Array4.h \
|
| 426 |
+
getSentence.h model1.h Perplexity.h Dictionary.h utility.h Parameter.h \
|
| 427 |
+
Pointer.h
|
| 428 |
+
norm/model2to3.o: model2to3.cpp model3.h Vector.h mystl.h myassert.h mymath.h \
|
| 429 |
+
Array2.h MoveSwapMatrix.h alignment.h defs.h transpair_model3.h \
|
| 430 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 431 |
+
transpair_model2.h transpair_model1.h getSentence.h model2.h model1.h \
|
| 432 |
+
Perplexity.h Dictionary.h transpair_modelhmm.h ForwardBackward.h \
|
| 433 |
+
Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h D4Tables.h \
|
| 434 |
+
AlignTables.h utility.h
|
| 435 |
+
norm/model345-peg.o: model345-peg.cpp model3.h Vector.h mystl.h myassert.h \
|
| 436 |
+
mymath.h Array2.h MoveSwapMatrix.h alignment.h defs.h \
|
| 437 |
+
transpair_model3.h NTables.h vocab.h ATables.h Array4.h TTables.h \
|
| 438 |
+
Globals.h transpair_model2.h transpair_model1.h getSentence.h model2.h \
|
| 439 |
+
model1.h Perplexity.h Dictionary.h transpair_modelhmm.h \
|
| 440 |
+
ForwardBackward.h Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h \
|
| 441 |
+
D4Tables.h AlignTables.h collCounts.h transpair_model4.h
|
| 442 |
+
norm/model3.o: model3.cpp model3.h Vector.h mystl.h myassert.h mymath.h \
|
| 443 |
+
Array2.h MoveSwapMatrix.h alignment.h defs.h transpair_model3.h \
|
| 444 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 445 |
+
transpair_model2.h transpair_model1.h getSentence.h model2.h model1.h \
|
| 446 |
+
Perplexity.h Dictionary.h transpair_modelhmm.h ForwardBackward.h \
|
| 447 |
+
Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h D4Tables.h \
|
| 448 |
+
AlignTables.h collCounts.h transpair_model4.h utility.h D5Tables.h \
|
| 449 |
+
transpair_model5.h Parameter.h Pointer.h
|
| 450 |
+
norm/model3_viterbi.o: model3_viterbi.cpp model3.h Vector.h mystl.h myassert.h \
|
| 451 |
+
mymath.h Array2.h MoveSwapMatrix.h alignment.h defs.h \
|
| 452 |
+
transpair_model3.h NTables.h vocab.h ATables.h Array4.h TTables.h \
|
| 453 |
+
Globals.h transpair_model2.h transpair_model1.h getSentence.h model2.h \
|
| 454 |
+
model1.h Perplexity.h Dictionary.h transpair_modelhmm.h \
|
| 455 |
+
ForwardBackward.h Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h \
|
| 456 |
+
D4Tables.h AlignTables.h utility.h
|
| 457 |
+
norm/model3_viterbi_with_tricks.o: model3_viterbi_with_tricks.cpp mystl.h \
|
| 458 |
+
myassert.h mymath.h Array2.h model3.h Vector.h MoveSwapMatrix.h \
|
| 459 |
+
alignment.h defs.h transpair_model3.h NTables.h vocab.h ATables.h \
|
| 460 |
+
Array4.h TTables.h Globals.h transpair_model2.h transpair_model1.h \
|
| 461 |
+
getSentence.h model2.h model1.h Perplexity.h Dictionary.h \
|
| 462 |
+
transpair_modelhmm.h ForwardBackward.h Array.h hmm.h WordClasses.h \
|
| 463 |
+
HMMTables.h FlexArray.h D4Tables.h AlignTables.h collCounts.h \
|
| 464 |
+
transpair_model4.h utility.h D5Tables.h transpair_model5.h Parameter.h \
|
| 465 |
+
Pointer.h collCounts.cpp
|
| 466 |
+
norm/MoveSwapMatrix.o: MoveSwapMatrix.cpp MoveSwapMatrix.h alignment.h Vector.h \
|
| 467 |
+
mystl.h myassert.h mymath.h Array2.h defs.h transpair_model3.h \
|
| 468 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 469 |
+
transpair_model2.h transpair_model1.h transpair_model4.h D4Tables.h \
|
| 470 |
+
WordClasses.h transpair_model5.h D5Tables.h transpair_modelhmm.h \
|
| 471 |
+
ForwardBackward.h Array.h hmm.h getSentence.h model2.h model1.h \
|
| 472 |
+
Perplexity.h Dictionary.h HMMTables.h FlexArray.h
|
| 473 |
+
norm/myassert.o: myassert.cpp mystl.h myassert.h mymath.h Array2.h
|
| 474 |
+
norm/NTables.o: NTables.cpp NTables.h Array2.h mystl.h myassert.h mymath.h \
|
| 475 |
+
Vector.h defs.h vocab.h Parameter.h Pointer.h Globals.h
|
| 476 |
+
norm/Parameter.o: Parameter.cpp Parameter.h mystl.h myassert.h mymath.h \
|
| 477 |
+
Array2.h Pointer.h Globals.h defs.h Vector.h
|
| 478 |
+
norm/parse.o: parse.cpp defs.h utility.h Perplexity.h Vector.h mystl.h \
|
| 479 |
+
myassert.h mymath.h Array2.h Globals.h TTables.h vocab.h getSentence.h \
|
| 480 |
+
D4Tables.h WordClasses.h D5Tables.h ATables.h Array4.h Parameter.h \
|
| 481 |
+
Pointer.h
|
| 482 |
+
norm/Perplexity.o: Perplexity.cpp Perplexity.h Vector.h mystl.h myassert.h \
|
| 483 |
+
mymath.h Array2.h defs.h Globals.h
|
| 484 |
+
norm/plain2snt.o: plain2snt.cpp
|
| 485 |
+
norm/reports.o: reports.cpp defs.h vocab.h Vector.h mystl.h myassert.h mymath.h \
|
| 486 |
+
Array2.h Perplexity.h Globals.h getSentence.h TTables.h Parameter.h \
|
| 487 |
+
Pointer.h
|
| 488 |
+
norm/snt2cooc.o: snt2cooc.cpp
|
| 489 |
+
norm/snt2plain.o: snt2plain.cpp
|
| 490 |
+
norm/transpair_model3.o: transpair_model3.cpp transpair_model3.h Array2.h \
|
| 491 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 492 |
+
Array4.h TTables.h Globals.h alignment.h transpair_model2.h \
|
| 493 |
+
transpair_model1.h
|
| 494 |
+
norm/transpair_model4.o: transpair_model4.cpp transpair_model4.h Array2.h \
|
| 495 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 496 |
+
Array4.h TTables.h Globals.h alignment.h D4Tables.h WordClasses.h \
|
| 497 |
+
transpair_model3.h transpair_model2.h transpair_model1.h Parameter.h \
|
| 498 |
+
Pointer.h
|
| 499 |
+
norm/transpair_model5.o: transpair_model5.cpp transpair_model5.h Array2.h \
|
| 500 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 501 |
+
Array4.h TTables.h Globals.h alignment.h D5Tables.h D4Tables.h \
|
| 502 |
+
WordClasses.h transpair_model4.h transpair_model3.h transpair_model2.h \
|
| 503 |
+
transpair_model1.h Parameter.h Pointer.h
|
| 504 |
+
norm/TTables.o: TTables.cpp TTables.h defs.h vocab.h Vector.h mystl.h \
|
| 505 |
+
myassert.h mymath.h Array2.h Globals.h Parameter.h Pointer.h
|
| 506 |
+
norm/utility.o: utility.cpp mymath.h
|
| 507 |
+
norm/vocab.o: vocab.cpp vocab.h defs.h Vector.h mystl.h myassert.h mymath.h \
|
| 508 |
+
Array2.h
|
| 509 |
+
#Automatically generated dependecy list
|
| 510 |
+
profile/alignment.o: alignment.cpp alignment.h Vector.h mystl.h myassert.h \
|
| 511 |
+
mymath.h Array2.h defs.h
|
| 512 |
+
profile/AlignTables.o: AlignTables.cpp AlignTables.h defs.h Vector.h mystl.h \
|
| 513 |
+
myassert.h mymath.h Array2.h transpair_model1.h NTables.h vocab.h \
|
| 514 |
+
ATables.h Array4.h TTables.h Globals.h alignment.h
|
| 515 |
+
profile/ATables.o: ATables.cpp ATables.h defs.h Vector.h mystl.h myassert.h \
|
| 516 |
+
mymath.h Array2.h Array4.h Globals.h Parameter.h Pointer.h
|
| 517 |
+
profile/collCounts.o: collCounts.cpp alignment.h Vector.h mystl.h myassert.h \
|
| 518 |
+
mymath.h Array2.h defs.h transpair_model3.h NTables.h vocab.h ATables.h \
|
| 519 |
+
Array4.h TTables.h Globals.h transpair_model2.h transpair_model1.h \
|
| 520 |
+
collCounts.h MoveSwapMatrix.h D4Tables.h WordClasses.h \
|
| 521 |
+
transpair_model4.h D5Tables.h transpair_model5.h transpair_modelhmm.h \
|
| 522 |
+
ForwardBackward.h Array.h hmm.h getSentence.h model2.h model1.h \
|
| 523 |
+
Perplexity.h Dictionary.h HMMTables.h FlexArray.h Parameter.h Pointer.h
|
| 524 |
+
profile/Dictionary.o: Dictionary.cpp Dictionary.h Vector.h mystl.h myassert.h \
|
| 525 |
+
mymath.h Array2.h
|
| 526 |
+
profile/ForwardBackward.o: ForwardBackward.cpp ForwardBackward.h myassert.h \
|
| 527 |
+
Array.h Vector.h mystl.h mymath.h Array2.h Globals.h defs.h HMMTables.h \
|
| 528 |
+
FlexArray.h
|
| 529 |
+
profile/getSentence.o: getSentence.cpp getSentence.h Vector.h mystl.h myassert.h \
|
| 530 |
+
mymath.h Array2.h defs.h vocab.h Globals.h Parameter.h Pointer.h
|
| 531 |
+
profile/hmm.o: hmm.cpp hmm.h Vector.h mystl.h myassert.h mymath.h Array2.h \
|
| 532 |
+
TTables.h defs.h vocab.h Globals.h ATables.h Array4.h getSentence.h \
|
| 533 |
+
model2.h model1.h Perplexity.h Dictionary.h WordClasses.h HMMTables.h \
|
| 534 |
+
FlexArray.h Array.h ForwardBackward.h utility.h Parameter.h Pointer.h \
|
| 535 |
+
HMMTables.cpp
|
| 536 |
+
profile/HMMTables.o: HMMTables.cpp HMMTables.h FlexArray.h Array.h Vector.h \
|
| 537 |
+
mystl.h myassert.h mymath.h Array2.h Globals.h defs.h Parameter.h \
|
| 538 |
+
Pointer.h
|
| 539 |
+
profile/logprob.o: logprob.cpp logprob.h
|
| 540 |
+
profile/main.o: main.cpp getSentence.h Vector.h mystl.h myassert.h mymath.h \
|
| 541 |
+
Array2.h defs.h vocab.h Globals.h TTables.h model1.h Perplexity.h \
|
| 542 |
+
Dictionary.h model2.h ATables.h Array4.h model3.h MoveSwapMatrix.h \
|
| 543 |
+
alignment.h transpair_model3.h NTables.h transpair_model2.h \
|
| 544 |
+
transpair_model1.h transpair_modelhmm.h ForwardBackward.h Array.h hmm.h \
|
| 545 |
+
WordClasses.h HMMTables.h FlexArray.h D4Tables.h AlignTables.h \
|
| 546 |
+
file_spec.h utility.h Parameter.h Pointer.h D5Tables.h \
|
| 547 |
+
transpair_model4.h transpair_model5.h
|
| 548 |
+
profile/model1.o: model1.cpp model1.h Vector.h mystl.h myassert.h mymath.h \
|
| 549 |
+
Array2.h vocab.h defs.h TTables.h Globals.h getSentence.h Perplexity.h \
|
| 550 |
+
Dictionary.h utility.h Parameter.h Pointer.h
|
| 551 |
+
profile/model2.o: model2.cpp model2.h Vector.h mystl.h myassert.h mymath.h \
|
| 552 |
+
Array2.h TTables.h defs.h vocab.h Globals.h ATables.h Array4.h \
|
| 553 |
+
getSentence.h model1.h Perplexity.h Dictionary.h utility.h Parameter.h \
|
| 554 |
+
Pointer.h
|
| 555 |
+
profile/model2to3.o: model2to3.cpp model3.h Vector.h mystl.h myassert.h mymath.h \
|
| 556 |
+
Array2.h MoveSwapMatrix.h alignment.h defs.h transpair_model3.h \
|
| 557 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 558 |
+
transpair_model2.h transpair_model1.h getSentence.h model2.h model1.h \
|
| 559 |
+
Perplexity.h Dictionary.h transpair_modelhmm.h ForwardBackward.h \
|
| 560 |
+
Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h D4Tables.h \
|
| 561 |
+
AlignTables.h utility.h
|
| 562 |
+
profile/model345-peg.o: model345-peg.cpp model3.h Vector.h mystl.h myassert.h \
|
| 563 |
+
mymath.h Array2.h MoveSwapMatrix.h alignment.h defs.h \
|
| 564 |
+
transpair_model3.h NTables.h vocab.h ATables.h Array4.h TTables.h \
|
| 565 |
+
Globals.h transpair_model2.h transpair_model1.h getSentence.h model2.h \
|
| 566 |
+
model1.h Perplexity.h Dictionary.h transpair_modelhmm.h \
|
| 567 |
+
ForwardBackward.h Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h \
|
| 568 |
+
D4Tables.h AlignTables.h collCounts.h transpair_model4.h
|
| 569 |
+
profile/model3.o: model3.cpp model3.h Vector.h mystl.h myassert.h mymath.h \
|
| 570 |
+
Array2.h MoveSwapMatrix.h alignment.h defs.h transpair_model3.h \
|
| 571 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 572 |
+
transpair_model2.h transpair_model1.h getSentence.h model2.h model1.h \
|
| 573 |
+
Perplexity.h Dictionary.h transpair_modelhmm.h ForwardBackward.h \
|
| 574 |
+
Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h D4Tables.h \
|
| 575 |
+
AlignTables.h collCounts.h transpair_model4.h utility.h D5Tables.h \
|
| 576 |
+
transpair_model5.h Parameter.h Pointer.h
|
| 577 |
+
profile/model3_viterbi.o: model3_viterbi.cpp model3.h Vector.h mystl.h myassert.h \
|
| 578 |
+
mymath.h Array2.h MoveSwapMatrix.h alignment.h defs.h \
|
| 579 |
+
transpair_model3.h NTables.h vocab.h ATables.h Array4.h TTables.h \
|
| 580 |
+
Globals.h transpair_model2.h transpair_model1.h getSentence.h model2.h \
|
| 581 |
+
model1.h Perplexity.h Dictionary.h transpair_modelhmm.h \
|
| 582 |
+
ForwardBackward.h Array.h hmm.h WordClasses.h HMMTables.h FlexArray.h \
|
| 583 |
+
D4Tables.h AlignTables.h utility.h
|
| 584 |
+
profile/model3_viterbi_with_tricks.o: model3_viterbi_with_tricks.cpp mystl.h \
|
| 585 |
+
myassert.h mymath.h Array2.h model3.h Vector.h MoveSwapMatrix.h \
|
| 586 |
+
alignment.h defs.h transpair_model3.h NTables.h vocab.h ATables.h \
|
| 587 |
+
Array4.h TTables.h Globals.h transpair_model2.h transpair_model1.h \
|
| 588 |
+
getSentence.h model2.h model1.h Perplexity.h Dictionary.h \
|
| 589 |
+
transpair_modelhmm.h ForwardBackward.h Array.h hmm.h WordClasses.h \
|
| 590 |
+
HMMTables.h FlexArray.h D4Tables.h AlignTables.h collCounts.h \
|
| 591 |
+
transpair_model4.h utility.h D5Tables.h transpair_model5.h Parameter.h \
|
| 592 |
+
Pointer.h collCounts.cpp
|
| 593 |
+
profile/MoveSwapMatrix.o: MoveSwapMatrix.cpp MoveSwapMatrix.h alignment.h Vector.h \
|
| 594 |
+
mystl.h myassert.h mymath.h Array2.h defs.h transpair_model3.h \
|
| 595 |
+
NTables.h vocab.h ATables.h Array4.h TTables.h Globals.h \
|
| 596 |
+
transpair_model2.h transpair_model1.h transpair_model4.h D4Tables.h \
|
| 597 |
+
WordClasses.h transpair_model5.h D5Tables.h transpair_modelhmm.h \
|
| 598 |
+
ForwardBackward.h Array.h hmm.h getSentence.h model2.h model1.h \
|
| 599 |
+
Perplexity.h Dictionary.h HMMTables.h FlexArray.h
|
| 600 |
+
profile/myassert.o: myassert.cpp mystl.h myassert.h mymath.h Array2.h
|
| 601 |
+
profile/NTables.o: NTables.cpp NTables.h Array2.h mystl.h myassert.h mymath.h \
|
| 602 |
+
Vector.h defs.h vocab.h Parameter.h Pointer.h Globals.h
|
| 603 |
+
profile/Parameter.o: Parameter.cpp Parameter.h mystl.h myassert.h mymath.h \
|
| 604 |
+
Array2.h Pointer.h Globals.h defs.h Vector.h
|
| 605 |
+
profile/parse.o: parse.cpp defs.h utility.h Perplexity.h Vector.h mystl.h \
|
| 606 |
+
myassert.h mymath.h Array2.h Globals.h TTables.h vocab.h getSentence.h \
|
| 607 |
+
D4Tables.h WordClasses.h D5Tables.h ATables.h Array4.h Parameter.h \
|
| 608 |
+
Pointer.h
|
| 609 |
+
profile/Perplexity.o: Perplexity.cpp Perplexity.h Vector.h mystl.h myassert.h \
|
| 610 |
+
mymath.h Array2.h defs.h Globals.h
|
| 611 |
+
profile/plain2snt.o: plain2snt.cpp
|
| 612 |
+
profile/reports.o: reports.cpp defs.h vocab.h Vector.h mystl.h myassert.h mymath.h \
|
| 613 |
+
Array2.h Perplexity.h Globals.h getSentence.h TTables.h Parameter.h \
|
| 614 |
+
Pointer.h
|
| 615 |
+
profile/snt2cooc.o: snt2cooc.cpp
|
| 616 |
+
profile/snt2plain.o: snt2plain.cpp
|
| 617 |
+
profile/transpair_model3.o: transpair_model3.cpp transpair_model3.h Array2.h \
|
| 618 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 619 |
+
Array4.h TTables.h Globals.h alignment.h transpair_model2.h \
|
| 620 |
+
transpair_model1.h
|
| 621 |
+
profile/transpair_model4.o: transpair_model4.cpp transpair_model4.h Array2.h \
|
| 622 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 623 |
+
Array4.h TTables.h Globals.h alignment.h D4Tables.h WordClasses.h \
|
| 624 |
+
transpair_model3.h transpair_model2.h transpair_model1.h Parameter.h \
|
| 625 |
+
Pointer.h
|
| 626 |
+
profile/transpair_model5.o: transpair_model5.cpp transpair_model5.h Array2.h \
|
| 627 |
+
mystl.h myassert.h mymath.h defs.h Vector.h NTables.h vocab.h ATables.h \
|
| 628 |
+
Array4.h TTables.h Globals.h alignment.h D5Tables.h D4Tables.h \
|
| 629 |
+
WordClasses.h transpair_model4.h transpair_model3.h transpair_model2.h \
|
| 630 |
+
transpair_model1.h Parameter.h Pointer.h
|
| 631 |
+
profile/TTables.o: TTables.cpp TTables.h defs.h vocab.h Vector.h mystl.h \
|
| 632 |
+
myassert.h mymath.h Array2.h Globals.h Parameter.h Pointer.h
|
| 633 |
+
profile/utility.o: utility.cpp mymath.h
|
| 634 |
+
profile/vocab.o: vocab.cpp vocab.h defs.h Vector.h mystl.h myassert.h mymath.h \
|
| 635 |
+
Array2.h
|
tools/giza-pp/GIZA++-v2/file_spec.h
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
#ifndef FILE_SPEC_H
|
| 23 |
+
#define FILE_SPEC_H
|
| 24 |
+
|
| 25 |
+
#include <time.h>
|
| 26 |
+
#include <stdlib.h>
|
| 27 |
+
#include <string.h>
|
| 28 |
+
#include <stdio.h>
|
| 29 |
+
|
| 30 |
+
/* This function returns a string, locally called file_spec. This
|
| 31 |
+
string is the concatenation of the date and time of execution
|
| 32 |
+
and the user who is performing the execution */
|
| 33 |
+
/* Originally implemented in C by Yaser Al-Onaizan;
|
| 34 |
+
editions for C++ and formatting by Noah A. Smith, 9 July 1999 */
|
| 35 |
+
|
| 36 |
+
char *Get_File_Spec (){
|
| 37 |
+
struct tm *local;
|
| 38 |
+
time_t t;
|
| 39 |
+
const char *user;
|
| 40 |
+
char time_stmp[19];
|
| 41 |
+
char *file_spec = 0;
|
| 42 |
+
|
| 43 |
+
t = time(NULL);
|
| 44 |
+
local = localtime(&t);
|
| 45 |
+
|
| 46 |
+
sprintf(time_stmp, "%02d-%02d-%02d.%02d%02d%02d.", local->tm_year,
|
| 47 |
+
(local->tm_mon + 1), local->tm_mday, local->tm_hour,
|
| 48 |
+
local->tm_min, local->tm_sec);
|
| 49 |
+
user = getenv("USER");
|
| 50 |
+
if (!user) { user = "no_user"; }
|
| 51 |
+
|
| 52 |
+
file_spec = (char *)malloc(sizeof(char) *
|
| 53 |
+
(strlen(time_stmp) + strlen(user) + 1));
|
| 54 |
+
file_spec[0] = '\0';
|
| 55 |
+
strcat(file_spec, time_stmp) ;
|
| 56 |
+
strcat(file_spec, user);
|
| 57 |
+
return file_spec;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
#endif
|
tools/giza-pp/GIZA++-v2/getSentence.cpp
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/* --------------------------------------------------------------------------*
|
| 23 |
+
* *
|
| 24 |
+
* Module : getSentece *
|
| 25 |
+
* *
|
| 26 |
+
* Method Definitions File: getSentence.cc *
|
| 27 |
+
* *
|
| 28 |
+
* Objective: Defines clases and methods for handling I/O for the parallel *
|
| 29 |
+
* corpus. *
|
| 30 |
+
*****************************************************************************/
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
#include "getSentence.h"
|
| 34 |
+
#include <iostream>
|
| 35 |
+
#include <sstream>
|
| 36 |
+
#include "Parameter.h"
|
| 37 |
+
#include "errno.h"
|
| 38 |
+
|
| 39 |
+
int PrintedTooLong=0;
|
| 40 |
+
|
| 41 |
+
/* -------------- Method Defnitions for Class sentenceHandler ---------------*/
|
| 42 |
+
|
| 43 |
+
GLOBAL_PARAMETER(double,ManlexMAX_MULTIPLICITY,"manlexMAX_MULTIPLICITY","",PARLEV_EM,20.0);
|
| 44 |
+
GLOBAL_PARAMETER(double,Manlexfactor1,"manlexfactor1","",PARLEV_EM,0.0);
|
| 45 |
+
GLOBAL_PARAMETER(double,Manlexfactor2,"manlexfactor2","",PARLEV_EM,0.0);
|
| 46 |
+
|
| 47 |
+
sentenceHandler::sentenceHandler(const char* filename, vcbList* elist,
|
| 48 |
+
vcbList* flist) : realCount(0)
|
| 49 |
+
// This method is the constructor of the class, it also intitializes the
|
| 50 |
+
// sentence pair sequential number (count) to zero.
|
| 51 |
+
|
| 52 |
+
{
|
| 53 |
+
readflag = false ;
|
| 54 |
+
allInMemory = false ;
|
| 55 |
+
inputFilename = filename ;
|
| 56 |
+
inputFile = new ifstream(filename);
|
| 57 |
+
pair_no = 0 ;
|
| 58 |
+
if(!(*inputFile)){
|
| 59 |
+
cerr << "\nERROR:(a) Cannot open " << filename;
|
| 60 |
+
exit(1);
|
| 61 |
+
}
|
| 62 |
+
currentSentence = 0;
|
| 63 |
+
totalPairs1 = 0 ;
|
| 64 |
+
totalPairs2 =0;
|
| 65 |
+
pair_no = 0 ;
|
| 66 |
+
noSentInBuffer = 0 ;
|
| 67 |
+
Buffer.clear();
|
| 68 |
+
bool isNegative=0;
|
| 69 |
+
if (elist && flist){
|
| 70 |
+
cout << "Calculating vocabulary frequencies from corpus " << filename << '\n';
|
| 71 |
+
sentPair s ;
|
| 72 |
+
while (getNextSentence(s, elist, flist))
|
| 73 |
+
{
|
| 74 |
+
totalPairs1++;
|
| 75 |
+
totalPairs2+=s.realCount;
|
| 76 |
+
// NOTE: this value might change during training
|
| 77 |
+
// for words from the manual dictionary, yet this is ignored!
|
| 78 |
+
|
| 79 |
+
if( s.noOcc<0 )
|
| 80 |
+
isNegative=1;
|
| 81 |
+
}
|
| 82 |
+
}
|
| 83 |
+
if( isNegative==1 )
|
| 84 |
+
{
|
| 85 |
+
cerr << "WARNING: corpus contains negative occurrency frequencies => these are interpreted as entries of a manual dictionary.\n";
|
| 86 |
+
realCount=new Vector<double>(totalPairs1,1.0);
|
| 87 |
+
}
|
| 88 |
+
else
|
| 89 |
+
realCount=0;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
void sentenceHandler::rewind()
|
| 93 |
+
{
|
| 94 |
+
currentSentence = 0;
|
| 95 |
+
readflag = false ;
|
| 96 |
+
if (!allInMemory ||
|
| 97 |
+
!(Buffer.size() >= 1 && Buffer[currentSentence].sentenceNo == 1)){
|
| 98 |
+
// check if the buffer doe not already has the first chunk of pairs
|
| 99 |
+
if (Buffer.size() > 0)
|
| 100 |
+
cerr << ' ' << Buffer[currentSentence].sentenceNo << '\n';
|
| 101 |
+
// totalPairs = 0 ;
|
| 102 |
+
pair_no = 0 ;
|
| 103 |
+
noSentInBuffer = 0 ;
|
| 104 |
+
Buffer.clear();
|
| 105 |
+
}
|
| 106 |
+
if (!allInMemory){
|
| 107 |
+
delete inputFile;
|
| 108 |
+
inputFile = new ifstream(inputFilename);
|
| 109 |
+
if(!(*inputFile)){
|
| 110 |
+
cerr << "\nERROR:(b) Cannot open " << inputFilename << " " << (int)errno;
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
bool sentenceHandler::getNextSentence(sentPair& sent, vcbList* elist, vcbList* flist)
|
| 117 |
+
{
|
| 118 |
+
sentPair s ;
|
| 119 |
+
if (readflag){
|
| 120 |
+
cerr << "Attempting to read from the end of corpus, rewinding\n";
|
| 121 |
+
rewind();
|
| 122 |
+
return(false);
|
| 123 |
+
}
|
| 124 |
+
if (currentSentence >= noSentInBuffer){
|
| 125 |
+
if (allInMemory)
|
| 126 |
+
return(false);
|
| 127 |
+
/* no more sentences in buffer */
|
| 128 |
+
noSentInBuffer = 0 ;
|
| 129 |
+
currentSentence = 0 ;
|
| 130 |
+
Buffer.clear();
|
| 131 |
+
cout << "Reading more sentence pairs into memory ... \n";
|
| 132 |
+
while((noSentInBuffer < TRAIN_BUFFER_SIZE) && readNextSentence(s)){
|
| 133 |
+
if ((s.fSent.size()-1) > (MAX_FERTILITY-1) * (s.eSent.size()-1)){
|
| 134 |
+
cerr << "WARNING: The following sentence pair has source/target sentence length ration more than\n"<<
|
| 135 |
+
"the maximum allowed limit for a source word fertility\n"<<
|
| 136 |
+
" source length = " << s.eSent.size()-1 << " target length = " << s.fSent.size()-1 <<
|
| 137 |
+
" ratio " << double(s.fSent.size()-1)/ (s.eSent.size()-1) << " ferility limit : " <<
|
| 138 |
+
MAX_FERTILITY-1 << '\n';
|
| 139 |
+
cerr << "Shortening sentence \n";
|
| 140 |
+
cerr << s;
|
| 141 |
+
s.eSent.resize(min(s.eSent.size(),s.fSent.size()));
|
| 142 |
+
s.fSent.resize(min(s.eSent.size(),s.fSent.size()));
|
| 143 |
+
}
|
| 144 |
+
Buffer.push_back(s) ;
|
| 145 |
+
if (elist && flist){
|
| 146 |
+
if ((*elist).size() > 0)
|
| 147 |
+
for (WordIndex i= 0 ; i < s.eSent.size() ; i++){
|
| 148 |
+
if (s.eSent[i] >= (*elist).uniqTokens()){
|
| 149 |
+
if( PrintedTooLong++<100)
|
| 150 |
+
cerr << "ERROR: source word " << s.eSent[i] << " is not in the vocabulary list \n";
|
| 151 |
+
exit(-1);
|
| 152 |
+
}
|
| 153 |
+
(*elist).incFreq(s.eSent[i], s.realCount);
|
| 154 |
+
}
|
| 155 |
+
if ((*flist).size() > 0)
|
| 156 |
+
for (WordIndex j= 1 ; j < s.fSent.size() ; j++){
|
| 157 |
+
if (s.fSent[j] >= (*flist).uniqTokens()){
|
| 158 |
+
cerr << "ERROR: target word " << s.fSent[j] << " is not in the vocabulary list \n";
|
| 159 |
+
exit(-1);
|
| 160 |
+
}
|
| 161 |
+
(*flist).incFreq(s.fSent[j], s.realCount);
|
| 162 |
+
}
|
| 163 |
+
}
|
| 164 |
+
noSentInBuffer++;
|
| 165 |
+
}
|
| 166 |
+
if (inputFile->eof()){
|
| 167 |
+
allInMemory = (Buffer.size() >= 1 &&
|
| 168 |
+
Buffer[currentSentence].sentenceNo == 1) ;
|
| 169 |
+
if (allInMemory)
|
| 170 |
+
cout << "Corpus fits in memory, corpus has: " << Buffer.size() <<
|
| 171 |
+
" sentence pairs.\n";
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
if(noSentInBuffer <= 0 ){
|
| 175 |
+
//cerr << "# sent in buffer " << noSentInBuffer << '\n';
|
| 176 |
+
readflag = true ;
|
| 177 |
+
return(false);
|
| 178 |
+
}
|
| 179 |
+
sent = Buffer[currentSentence++] ;
|
| 180 |
+
if( sent.noOcc<0 && realCount )
|
| 181 |
+
{
|
| 182 |
+
if( Manlexfactor1 && sent.noOcc==-1.0 )
|
| 183 |
+
sent.realCount=Manlexfactor1;
|
| 184 |
+
else if( Manlexfactor2 && sent.noOcc==-2.0 )
|
| 185 |
+
sent.realCount=Manlexfactor2;
|
| 186 |
+
else
|
| 187 |
+
sent.realCount=(*realCount)[sent.getSentenceNo()-1];
|
| 188 |
+
}
|
| 189 |
+
return true ;
|
| 190 |
+
}
|
| 191 |
+
bool sentenceHandler::readNextSentence(sentPair& sent)
|
| 192 |
+
/* This method reads in a new pair of sentences, each pair is read from the
|
| 193 |
+
corpus file as line triples. The first line the no of times this line
|
| 194 |
+
pair occured in the corpus, the second line is the source sentence and
|
| 195 |
+
the third is the target sentence. The sentences are represented by a space
|
| 196 |
+
separated positive integer token ids. */
|
| 197 |
+
{
|
| 198 |
+
|
| 199 |
+
string line;
|
| 200 |
+
bool fail(false) ;
|
| 201 |
+
|
| 202 |
+
sent.clear();
|
| 203 |
+
if (getline(*inputFile, line)){
|
| 204 |
+
istringstream buffer(line);
|
| 205 |
+
buffer >> sent.noOcc;
|
| 206 |
+
if( sent.noOcc<0 )
|
| 207 |
+
{
|
| 208 |
+
if( realCount )
|
| 209 |
+
{
|
| 210 |
+
if( Manlexfactor1 && sent.noOcc==-1.0 )
|
| 211 |
+
sent.realCount=Manlexfactor1;
|
| 212 |
+
else if( Manlexfactor2 && sent.noOcc==-2.0 )
|
| 213 |
+
sent.realCount=Manlexfactor2;
|
| 214 |
+
else
|
| 215 |
+
{
|
| 216 |
+
sent.realCount=(*realCount)[pair_no];
|
| 217 |
+
}
|
| 218 |
+
}
|
| 219 |
+
else
|
| 220 |
+
sent.realCount=1.0;
|
| 221 |
+
}
|
| 222 |
+
else
|
| 223 |
+
sent.realCount=sent.noOcc;
|
| 224 |
+
}
|
| 225 |
+
else {
|
| 226 |
+
fail = true ;;
|
| 227 |
+
}
|
| 228 |
+
if (getline(*inputFile, line)){
|
| 229 |
+
istringstream buffer(line);
|
| 230 |
+
WordIndex w; // w is a local variabe for token id
|
| 231 |
+
sent.eSent.push_back(0); // each source word is assumed to have 0 ==
|
| 232 |
+
// a null word (id 0) at the begining of the sentence.
|
| 233 |
+
while(buffer>>w){ // read source sentece , word by word .
|
| 234 |
+
if (sent.eSent.size() < MAX_SENTENCE_LENGTH)
|
| 235 |
+
sent.eSent.push_back(w);
|
| 236 |
+
else {
|
| 237 |
+
if( PrintedTooLong++<100)
|
| 238 |
+
cerr << "{WARNING:(a)truncated sentence "<<pair_no<<"}";
|
| 239 |
+
//cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
|
| 240 |
+
//cerr << "The following sentence will be truncated\n" << line;
|
| 241 |
+
break ;
|
| 242 |
+
}
|
| 243 |
+
}
|
| 244 |
+
}
|
| 245 |
+
else {
|
| 246 |
+
fail = true ;
|
| 247 |
+
}
|
| 248 |
+
if (getline(*inputFile, line)){
|
| 249 |
+
istringstream buffer(line);
|
| 250 |
+
WordIndex w; // w is a local variabe for token id
|
| 251 |
+
sent.fSent.push_back(0); //0 is inserted for program uniformity
|
| 252 |
+
while(buffer>>w){ // read target sentece , word by word .
|
| 253 |
+
if (sent.fSent.size() < MAX_SENTENCE_LENGTH)
|
| 254 |
+
sent.fSent.push_back(w);
|
| 255 |
+
else {
|
| 256 |
+
if( PrintedTooLong++<100)
|
| 257 |
+
cerr << "{WARNING:(b)truncated sentence "<<pair_no<<"}";
|
| 258 |
+
//cerr << "ERROR: getSentence.cc:getNextSentence(): sentence exceeds preset length limit of : " << MAX_SENTENCE_LENGTH << '\n';
|
| 259 |
+
//cerr << "The following sentence will be truncated\n" << line;
|
| 260 |
+
break ;
|
| 261 |
+
}
|
| 262 |
+
}
|
| 263 |
+
}
|
| 264 |
+
else {
|
| 265 |
+
fail = true ;
|
| 266 |
+
}
|
| 267 |
+
if (fail){
|
| 268 |
+
sent.eSent.clear();
|
| 269 |
+
sent.fSent.clear();
|
| 270 |
+
sent.sentenceNo = 0 ;
|
| 271 |
+
sent.noOcc = 0 ;
|
| 272 |
+
sent.realCount=0;
|
| 273 |
+
return(false);
|
| 274 |
+
}
|
| 275 |
+
if( sent.eSent.size()==1||sent.fSent.size()==1 )
|
| 276 |
+
cerr << "ERROR: Forbidden zero sentence length " << sent.sentenceNo << endl;
|
| 277 |
+
sent.sentenceNo = ++pair_no;
|
| 278 |
+
if(pair_no % 100000 == 0)
|
| 279 |
+
cout << "[sent:" << sent.sentenceNo << "]"<< '\n';
|
| 280 |
+
return true;
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
double optimize_lambda(Vector<double>&vd)
|
| 284 |
+
{
|
| 285 |
+
Vector<double> l;
|
| 286 |
+
for(double lambda=1.0;lambda<ManlexMAX_MULTIPLICITY;lambda+=0.33)
|
| 287 |
+
{
|
| 288 |
+
double prod=0.0;
|
| 289 |
+
for(unsigned int i=0;i<vd.size();++i)
|
| 290 |
+
{
|
| 291 |
+
prod += vd[i]*exp(lambda*vd[i])/(exp(lambda*vd[i])-1.0);
|
| 292 |
+
}
|
| 293 |
+
l.push_back(fabs(prod-1.0));
|
| 294 |
+
}
|
| 295 |
+
double lam=double(min_element(l.begin(),l.end())-l.begin())*0.33+1.0;
|
| 296 |
+
if( lam<1.0 )
|
| 297 |
+
{
|
| 298 |
+
cerr << "ERROR: lambda is smaller than one: " << lam << endl;
|
| 299 |
+
for(unsigned int i=0;i<vd.size();++i)
|
| 300 |
+
cerr << vd[i] << ' ';
|
| 301 |
+
cerr << endl;
|
| 302 |
+
}
|
| 303 |
+
return lam;
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
void sentenceHandler::setProbOfSentence(const sentPair&s,double d)
|
| 307 |
+
{
|
| 308 |
+
if( realCount==0 )
|
| 309 |
+
return;
|
| 310 |
+
else
|
| 311 |
+
{
|
| 312 |
+
if( s.noOcc<=0 )
|
| 313 |
+
{
|
| 314 |
+
double ed=exp(d);
|
| 315 |
+
if( oldPairs.size()>0&&(oldPairs.back().get_eSent()!=s.get_eSent()||oldPairs.back().getSentenceNo()>=s.getSentenceNo()) )
|
| 316 |
+
{
|
| 317 |
+
double lambda=optimize_lambda(oldProbs);
|
| 318 |
+
for(unsigned int i=0;i<oldPairs.size();++i)
|
| 319 |
+
{
|
| 320 |
+
if( oldProbs[i]<1e-5 )
|
| 321 |
+
(*realCount)[oldPairs[i].getSentenceNo()-1]=1.0;
|
| 322 |
+
else
|
| 323 |
+
(*realCount)[oldPairs[i].getSentenceNo()-1]=lambda*oldProbs[i]/(1-exp(-lambda*oldProbs[i]));
|
| 324 |
+
}
|
| 325 |
+
oldPairs.clear();
|
| 326 |
+
oldProbs.clear();
|
| 327 |
+
}
|
| 328 |
+
oldPairs.push_back(s);
|
| 329 |
+
oldProbs.push_back(ed);
|
| 330 |
+
}
|
| 331 |
+
}
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
/* ------------- End of Method Definition of Class sentenceHandler ----------*/
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
|
tools/giza-pp/GIZA++-v2/getSentence.h
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
/* --------------------------------------------------------------------------*
|
| 23 |
+
* *
|
| 24 |
+
* Module : getSentence *
|
| 25 |
+
* *
|
| 26 |
+
* Prototypes File: getSentence.h *
|
| 27 |
+
* *
|
| 28 |
+
* Objective: Defines clases and methods for handling I/O for the parallel *
|
| 29 |
+
* corpus. *
|
| 30 |
+
*****************************************************************************/
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
#ifndef _sentenceHandler_h
|
| 37 |
+
#define _sentenceHandler_h 1
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
#include <iostream>
|
| 41 |
+
#include <fstream>
|
| 42 |
+
#include <string>
|
| 43 |
+
#include "Vector.h"
|
| 44 |
+
#include "defs.h"
|
| 45 |
+
#include "vocab.h"
|
| 46 |
+
#include "Globals.h"
|
| 47 |
+
/*----------------------- Class Prototype Definition ------------------------*
|
| 48 |
+
Class Name: sentenceHandleer
|
| 49 |
+
Objective: This class is defined to handle training sentece pairs from the
|
| 50 |
+
parallel corpus. Each pair has: a target sentece, called here French; a
|
| 51 |
+
source sentece, called here English sentece; and an integer number denoting
|
| 52 |
+
the number of times this pair occured in trining corpus. Both source and
|
| 53 |
+
target senteces are represented as integer vector (variable size arrays),
|
| 54 |
+
each entry is a numeric value which is the token id for the particular token
|
| 55 |
+
in the sentece.
|
| 56 |
+
|
| 57 |
+
*---------------------------------------------------------------------------*/
|
| 58 |
+
|
| 59 |
+
class sentPair{
|
| 60 |
+
public:
|
| 61 |
+
int sentenceNo ;
|
| 62 |
+
float noOcc;
|
| 63 |
+
float realCount;
|
| 64 |
+
Vector<WordIndex> eSent ;
|
| 65 |
+
Vector<WordIndex> fSent;
|
| 66 |
+
|
| 67 |
+
public:
|
| 68 |
+
sentPair(){};
|
| 69 |
+
void clear(){ eSent.clear(); fSent.clear(); noOcc=0; realCount=0; sentenceNo=0;};
|
| 70 |
+
const Vector<WordIndex>&get_eSent()const
|
| 71 |
+
{ return eSent; }
|
| 72 |
+
const Vector<WordIndex>&get_fSent()const
|
| 73 |
+
{ return fSent; }
|
| 74 |
+
int getSentenceNo()const
|
| 75 |
+
{ return sentenceNo; }
|
| 76 |
+
double getCount()const
|
| 77 |
+
{ return realCount; }
|
| 78 |
+
};
|
| 79 |
+
|
| 80 |
+
inline ostream&operator<<(ostream&of,const sentPair&s)
|
| 81 |
+
{
|
| 82 |
+
of << "Sent No: " << s.sentenceNo << " , No. Occurrences: " << s.noOcc << '\n';
|
| 83 |
+
if( s.noOcc!=s.realCount )
|
| 84 |
+
of << " Used No. Occurrences: " << s.realCount << '\n';
|
| 85 |
+
unsigned int i;
|
| 86 |
+
for(i=0; i < s.eSent.size(); i++)
|
| 87 |
+
of << s.eSent[i] << ' ';
|
| 88 |
+
of << '\n';
|
| 89 |
+
for(i=1; i < s.fSent.size(); i++)
|
| 90 |
+
of << s.fSent[i] << ' ';
|
| 91 |
+
of << '\n';
|
| 92 |
+
return of;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
class sentenceHandler{
|
| 96 |
+
public:
|
| 97 |
+
const char * inputFilename; // parallel corpus file name, similar for all
|
| 98 |
+
// sentence pair objects
|
| 99 |
+
ifstream *inputFile; // parallel corpus file handler
|
| 100 |
+
Vector<sentPair> Buffer;
|
| 101 |
+
int noSentInBuffer ;
|
| 102 |
+
int currentSentence ;
|
| 103 |
+
int totalPairs1 ;
|
| 104 |
+
double totalPairs2;
|
| 105 |
+
bool readflag ; // true if you reach the end of file
|
| 106 |
+
bool allInMemory ;
|
| 107 |
+
int pair_no ;
|
| 108 |
+
Vector<double> *realCount;
|
| 109 |
+
|
| 110 |
+
Vector<sentPair> oldPairs;
|
| 111 |
+
Vector<double> oldProbs;
|
| 112 |
+
sentenceHandler(const char* filename, vcbList* elist=0, vcbList* flist=0);
|
| 113 |
+
void rewind();
|
| 114 |
+
bool getNextSentence(sentPair&, vcbList* = 0, vcbList* = 0); // will be defined in the definition file, this
|
| 115 |
+
int getTotalNoPairs1()const {return totalPairs1;};
|
| 116 |
+
double getTotalNoPairs2()const {return totalPairs2;};
|
| 117 |
+
// method will read the next pair of sentence from memory buffer
|
| 118 |
+
bool readNextSentence(sentPair&); // will be defined in the definition file, this
|
| 119 |
+
void setProbOfSentence(const sentPair&s,double d);
|
| 120 |
+
};
|
| 121 |
+
|
| 122 |
+
#endif
|
| 123 |
+
|
tools/giza-pp/GIZA++-v2/hmm.cpp
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
Copyright (C) 1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
|
| 4 |
+
|
| 5 |
+
This file is part of GIZA++ ( extension of GIZA ).
|
| 6 |
+
|
| 7 |
+
This program is free software; you can redistribute it and/or
|
| 8 |
+
modify it under the terms of the GNU General Public License
|
| 9 |
+
as published by the Free Software Foundation; either version 2
|
| 10 |
+
of the License, or (at your option) any later version.
|
| 11 |
+
|
| 12 |
+
This program is distributed in the hope that it will be useful,
|
| 13 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 14 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 15 |
+
GNU General Public License for more details.
|
| 16 |
+
|
| 17 |
+
You should have received a copy of the GNU General Public License
|
| 18 |
+
along with this program; if not, write to the Free Software
|
| 19 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 20 |
+
USA.
|
| 21 |
+
|
| 22 |
+
*/
|
| 23 |
+
#include "hmm.h"
|
| 24 |
+
#include "Globals.h"
|
| 25 |
+
#include "utility.h"
|
| 26 |
+
#include "HMMTables.h"
|
| 27 |
+
#include "ForwardBackward.h"
|
| 28 |
+
#include "Parameter.h"
|
| 29 |
+
|
| 30 |
+
#define CLASSIFY(i,empty,ianf) bool empty=(i>=l); unsigned int ianf=(i%l);
|
| 31 |
+
#define CLASSIFY2(i,ianf) unsigned int ianf=(i%l);
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
short PredictionInAlignments=0;
|
| 35 |
+
short UniformEntryExit=3;
|
| 36 |
+
short HMMTrainingSpecialFlags=0;
|
| 37 |
+
|
| 38 |
+
GLOBAL_PARAMETER2(int,ModelH_Dump_Freq,"HMM DUMP FREQUENCY","th","dump frequency of HMM",PARLEV_OUTPUT,0);
|
| 39 |
+
|
| 40 |
+
GLOBAL_PARAMETER(short,CompareAlDeps,"emAlignmentDependencies",
|
| 41 |
+
"lextrain: dependencies in the HMM alignment model. "
|
| 42 |
+
" &1: sentence length; &2: previous class; &4: previous position; "
|
| 43 |
+
" &8: French position; &16: French class"
|
| 44 |
+
,PARLEV_MODELS,2);
|
| 45 |
+
GLOBAL_PARAMETER(double,GLOBALProbabilityForEmpty,"emProbForEmpty",
|
| 46 |
+
"f-b-trn: probability for empty word",PARLEV_MODELS,0.4);
|
| 47 |
+
GLOBAL_PARAMETER(short,SmoothHMM,"emSmoothHMM",
|
| 48 |
+
"f-b-trn: smooth HMM model &1: modified counts; &2:perform smoothing with -emAlSmooth",PARLEV_SPECIAL,2);
|
| 49 |
+
GLOBAL_PARAMETER(double,HMMAlignmentModelSmoothFactor,"emAlSmooth",
|
| 50 |
+
"f-b-trn: smoothing factor for HMM alignment model (can be ignored by -emSmoothHMM)",PARLEV_SMOOTH,0.2);
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
/*template<class T>
|
| 54 |
+
void smooth_standard(T*a,T*b,double p)
|
| 55 |
+
{
|
| 56 |
+
int n=b-a;
|
| 57 |
+
if( n==0 )
|
| 58 |
+
return;
|
| 59 |
+
double pp=p/n;
|
| 60 |
+
for(T*i=a;i!=b;++i)
|
| 61 |
+
*i = (1.0-p)*(*i)+pp;
|
| 62 |
+
}*/
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
hmm::hmm(model2& m)
|
| 66 |
+
: model2(m),counts(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses),
|
| 67 |
+
probs(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses)
|
| 68 |
+
{ }
|
| 69 |
+
|
| 70 |
+
void hmm::initialize_table_uniformly(sentenceHandler&){}
|
| 71 |
+
|
| 72 |
+
int hmm::em_with_tricks(int noIterations)
|
| 73 |
+
{
|
| 74 |
+
double minErrors=1.0;int minIter=0;
|
| 75 |
+
string modelName="Hmm",shortModelName="hmm";
|
| 76 |
+
int dumpFreq=ModelH_Dump_Freq;
|
| 77 |
+
time_t it_st, st, it_fn, fn;
|
| 78 |
+
string tfile, afile,afileh, number, alignfile, test_alignfile;
|
| 79 |
+
int pair_no = 0;
|
| 80 |
+
bool dump_files = false ;
|
| 81 |
+
ofstream of2 ;
|
| 82 |
+
st = time(NULL) ;
|
| 83 |
+
sHandler1.rewind();
|
| 84 |
+
cout << "\n==========================================================\n";
|
| 85 |
+
cout << modelName << " Training Started at: " << ctime(&st);
|
| 86 |
+
for(int it=1; it <= noIterations ; it++){
|
| 87 |
+
pair_no = 0;
|
| 88 |
+
it_st = time(NULL) ;
|
| 89 |
+
cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
|
| 90 |
+
dump_files = (dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
|
| 91 |
+
number = "";
|
| 92 |
+
int n = it;
|
| 93 |
+
do{
|
| 94 |
+
number.insert((size_t)0, 1, (char)(n % 10 + '0'));
|
| 95 |
+
} while((n /= 10) > 0);
|
| 96 |
+
tfile = Prefix + ".t" + shortModelName + "." + number ;
|
| 97 |
+
afile = Prefix + ".a" + shortModelName + "." + number ;
|
| 98 |
+
afileh = Prefix + ".h" + shortModelName + "." + number ;
|
| 99 |
+
alignfile = Prefix + ".A" + shortModelName + "." + number ;
|
| 100 |
+
test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
|
| 101 |
+
counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
|
| 102 |
+
aCountTable.clear();
|
| 103 |
+
initAL();
|
| 104 |
+
em_loop(perp, sHandler1, dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,it);
|
| 105 |
+
if( errorsAL()<minErrors )
|
| 106 |
+
{
|
| 107 |
+
minErrors=errorsAL();
|
| 108 |
+
minIter=it;
|
| 109 |
+
}
|
| 110 |
+
if (testPerp && testHandler)
|
| 111 |
+
em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp, true,it==1,it);
|
| 112 |
+
if (dump_files&&OutputInAachenFormat==1)
|
| 113 |
+
tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
|
| 114 |
+
tTable.normalizeTable(Elist, Flist);
|
| 115 |
+
aCountTable.normalize(aTable);
|
| 116 |
+
probs=counts;
|
| 117 |
+
cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
|
| 118 |
+
<< " PERPLEXITY " << perp.perplexity() << '\n';
|
| 119 |
+
if (testPerp && testHandler)
|
| 120 |
+
cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
|
| 121 |
+
<< " PERPLEXITY " << (*testPerp).perplexity()
|
| 122 |
+
<< '\n';
|
| 123 |
+
cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
|
| 124 |
+
<< " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
|
| 125 |
+
if (testPerp && testHandler)
|
| 126 |
+
cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
|
| 127 |
+
<< " PERPLEXITY " << testViterbiPerp->perplexity()
|
| 128 |
+
<< '\n';
|
| 129 |
+
if (dump_files){
|
| 130 |
+
if( OutputInAachenFormat==0)
|
| 131 |
+
tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
|
| 132 |
+
ofstream afilestream(afileh.c_str());
|
| 133 |
+
probs.writeJumps(afilestream);
|
| 134 |
+
aCountTable.printTable(afile.c_str());
|
| 135 |
+
}
|
| 136 |
+
it_fn = time(NULL) ;
|
| 137 |
+
cout << "\n" << modelName << " Iteration: " << it<< " took: " <<
|
| 138 |
+
difftime(it_fn, it_st) << " seconds\n";
|
| 139 |
+
} // end of iterations
|
| 140 |
+
fn = time(NULL) ;
|
| 141 |
+
cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
|
| 142 |
+
//cout << "tTable contains " << tTable.getHash().bucket_count()
|
| 143 |
+
// << " buckets and " << tTable.getHash().size() << " entries." ;
|
| 144 |
+
cout << "==========================================================\n";
|
| 145 |
+
return minIter;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
/*template<class T>
|
| 149 |
+
T normalize_if_possible_with_increment(T*a,T*b,int increment)
|
| 150 |
+
{
|
| 151 |
+
T sum=0;
|
| 152 |
+
for(T*i=a;i!=b;i+=increment)
|
| 153 |
+
sum+=*i;
|
| 154 |
+
if( sum )
|
| 155 |
+
for(T*i=a;i!=b;i+=increment)
|
| 156 |
+
*i/=sum;
|
| 157 |
+
else
|
| 158 |
+
{
|
| 159 |
+
T factor=increment/(b-a);
|
| 160 |
+
for(T*i=a;i!=b;i+=increment)
|
| 161 |
+
*i=factor;
|
| 162 |
+
}
|
| 163 |
+
return sum;
|
| 164 |
+
}*/
|
| 165 |
+
|
| 166 |
+
void hmm::load_table(const char* aname){
|
| 167 |
+
cout << "Hmm: loading a table not implemented.\n";
|
| 168 |
+
abort();
|
| 169 |
+
ifstream anamefile(aname);
|
| 170 |
+
probs.readJumps(anamefile);
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
HMMNetwork *hmm::makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const
|
| 174 |
+
{
|
| 175 |
+
unsigned int i,j;
|
| 176 |
+
unsigned int l = es.size() - 1;
|
| 177 |
+
unsigned int m = fs.size() - 1;
|
| 178 |
+
unsigned int I=2*l,J=m;
|
| 179 |
+
int IJ=I*J;
|
| 180 |
+
bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
|
| 181 |
+
bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
|
| 182 |
+
HMMNetwork *net = new HMMNetwork(I,J);
|
| 183 |
+
fill(net->alphainit.begin(),net->alphainit.end(),0.0);
|
| 184 |
+
fill(net->betainit.begin(),net->betainit.end(),0.0);
|
| 185 |
+
for(j=1;j<=m;j++)
|
| 186 |
+
{
|
| 187 |
+
for(i=1;i<=l;i++)
|
| 188 |
+
net->n(i-1,j-1)=tTable.getProb(es[i], fs[j]) ;
|
| 189 |
+
double emptyContribution=0;
|
| 190 |
+
emptyContribution=tTable.getProb(es[0],fs[j]) ;
|
| 191 |
+
for(i=1;i<=l;i++)
|
| 192 |
+
net->n(i+l-1,j-1)=emptyContribution;
|
| 193 |
+
net->finalMultiply*=max(normalize_if_possible_with_increment(&net->n(0,j-1),&net->n(0,j-1)+IJ,J),double(1e-12));
|
| 194 |
+
}
|
| 195 |
+
if( DependencyOfJ )
|
| 196 |
+
net->e.resize(m-1);
|
| 197 |
+
else
|
| 198 |
+
net->e.resize(J>1);
|
| 199 |
+
for(j=0;j<net->e.size();j++)
|
| 200 |
+
{
|
| 201 |
+
int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(j)+1)]);
|
| 202 |
+
net->e[j].resize(I,I,0);
|
| 203 |
+
for(unsigned int i1=0;i1<I;++i1) {
|
| 204 |
+
Array<double> al(l);
|
| 205 |
+
CLASSIFY2(i1,i1real);
|
| 206 |
+
for(unsigned int i2=0;i2<l;i2++)
|
| 207 |
+
al[i2]=probs.getAlProb(i1real,i2,l,m,ewordclasses.getClass(es[1+i1real]),frenchClass
|
| 208 |
+
,j+1);
|
| 209 |
+
normalize_if_possible(conv<double>(al.begin()),conv<double>(al.end()));
|
| 210 |
+
if( SmoothHMM&2 )
|
| 211 |
+
smooth_standard(conv<double>(al.begin()),conv<double>(al.end()),HMMAlignmentModelSmoothFactor);
|
| 212 |
+
for(unsigned int i2=0;i2<I;i2++) {
|
| 213 |
+
CLASSIFY(i2,empty_i2,i2real);
|
| 214 |
+
net->e[j](i1,i2) = al[i2real];
|
| 215 |
+
|
| 216 |
+
if( empty_i2 )
|
| 217 |
+
if(i1real!=i2real)
|
| 218 |
+
{
|
| 219 |
+
net->e[j](i1,i2)=0;
|
| 220 |
+
}
|
| 221 |
+
else
|
| 222 |
+
{
|
| 223 |
+
net->e[j](i1,i2)=doInit?al[0]:(probs.getProbabilityForEmpty()); // make first HMM iteration like IBM-1
|
| 224 |
+
}
|
| 225 |
+
}
|
| 226 |
+
normalize_if_possible(&net->e[j](i1,0),&net->e[j](i1,0)+I);
|
| 227 |
+
}
|
| 228 |
+
}
|
| 229 |
+
if( doInit )
|
| 230 |
+
{
|
| 231 |
+
for(unsigned int i=0;i<I;++i)
|
| 232 |
+
{
|
| 233 |
+
net->alphainit[i]=net->betainit[i]=(i<I/2)?1:(2.0/I);
|
| 234 |
+
net->betainit[i]=1.0;
|
| 235 |
+
}
|
| 236 |
+
}
|
| 237 |
+
else
|
| 238 |
+
{
|
| 239 |
+
if( DependencyOfPrevAJ==0 )
|
| 240 |
+
{
|
| 241 |
+
for(i=0;i<I;i++)
|
| 242 |
+
{
|
| 243 |
+
CLASSIFY2(i,ireal);
|
| 244 |
+
net->alphainit[i]=probs.getAlProb(-1,ireal,l,m,0,fwordclasses.getClass(fs[1+0]),0);
|
| 245 |
+
}
|
| 246 |
+
}
|
| 247 |
+
else
|
| 248 |
+
{
|
| 249 |
+
if( UniformEntryExit&2 )probs.getBetaInit(I,net->betainit);
|
| 250 |
+
if( UniformEntryExit&1 )probs.getAlphaInit(I,net->alphainit);
|
| 251 |
+
}
|
| 252 |
+
}
|
| 253 |
+
massert( net->alphainit.size()==I );massert( net->betainit.size()==I );
|
| 254 |
+
normalize_if_possible(conv<double>(net->alphainit.begin()),conv<double>(net->alphainit.end()));
|
| 255 |
+
normalize_if_possible(conv<double>(net->betainit.begin()),conv<double>(net->betainit.end()));
|
| 256 |
+
transform(net->betainit.begin(),net->betainit.end(),net->betainit.begin(),bind1st(multiplies<double>(),2*l));
|
| 257 |
+
return net;
|
| 258 |
+
}
|
| 259 |
+
extern float MINCOUNTINCREASE;
|
| 260 |
+
|
| 261 |
+
void hmm::em_loop(Perplexity& perp, sentenceHandler& sHandler1,
|
| 262 |
+
bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
|
| 263 |
+
bool test,bool doInit,int
|
| 264 |
+
)
|
| 265 |
+
{
|
| 266 |
+
WordIndex i, j, l, m ;
|
| 267 |
+
double cross_entropy;
|
| 268 |
+
int pair_no=0 ;
|
| 269 |
+
perp.clear();
|
| 270 |
+
viterbi_perp.clear();
|
| 271 |
+
ofstream of2;
|
| 272 |
+
// for each sentence pair in the corpus
|
| 273 |
+
if (dump_alignment||FEWDUMPS )
|
| 274 |
+
of2.open(alignfile);
|
| 275 |
+
sentPair sent ;
|
| 276 |
+
sHandler1.rewind();
|
| 277 |
+
while(sHandler1.getNextSentence(sent)){
|
| 278 |
+
const Vector<WordIndex>& es = sent.get_eSent();
|
| 279 |
+
const Vector<WordIndex>& fs = sent.get_fSent();
|
| 280 |
+
const float so = sent.getCount();
|
| 281 |
+
l = es.size() - 1;
|
| 282 |
+
m = fs.size() - 1;
|
| 283 |
+
cross_entropy = log(1.0);
|
| 284 |
+
Vector<WordIndex> viterbi_alignment(fs.size());
|
| 285 |
+
|
| 286 |
+
unsigned int I=2*l,J=m;
|
| 287 |
+
bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
|
| 288 |
+
bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
|
| 289 |
+
HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
|
| 290 |
+
Array<double> gamma;
|
| 291 |
+
Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
|
| 292 |
+
double trainProb;
|
| 293 |
+
trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
|
| 294 |
+
if( !test )
|
| 295 |
+
{
|
| 296 |
+
double *gp=conv<double>(gamma.begin());
|
| 297 |
+
for(unsigned int i2=0;i2<J;i2++)for(unsigned int i1=0;i1<I;++i1,++gp)
|
| 298 |
+
if( *gp>MINCOUNTINCREASE )
|
| 299 |
+
{
|
| 300 |
+
COUNT add= *gp*so;
|
| 301 |
+
if( i1>=l )
|
| 302 |
+
{
|
| 303 |
+
tTable.incCount(es[0],fs[1+i2],add);
|
| 304 |
+
aCountTable.getRef(0,i2+1,l,m)+=add;
|
| 305 |
+
}
|
| 306 |
+
else
|
| 307 |
+
{
|
| 308 |
+
tTable.incCount(es[1+i1],fs[1+i2],add);
|
| 309 |
+
aCountTable.getRef(1+i1,1+i2,l,m)+=add;
|
| 310 |
+
}
|
| 311 |
+
}
|
| 312 |
+
double p0c=0.0,np0c=0.0;
|
| 313 |
+
for(unsigned int jj=0;jj<epsilon.size();jj++)
|
| 314 |
+
{
|
| 315 |
+
int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
|
| 316 |
+
double *ep=epsilon[jj].begin();
|
| 317 |
+
if( ep )
|
| 318 |
+
{
|
| 319 |
+
//for(i=0;i<I;i++)
|
| 320 |
+
// normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
|
| 321 |
+
// for(i=0;i<I*I;++i)
|
| 322 |
+
// ep[i] *= I;
|
| 323 |
+
//if( DependencyOfJ )
|
| 324 |
+
// if( J-1 )
|
| 325 |
+
// for(i=0;i<I*I;++i)
|
| 326 |
+
// ep[i] /= (J-1);
|
| 327 |
+
double mult=1.0;
|
| 328 |
+
mult*=l;
|
| 329 |
+
//if( DependencyOfJ && J-1)
|
| 330 |
+
// mult/=(J-1);
|
| 331 |
+
for(i=0;i<I;i++)
|
| 332 |
+
{
|
| 333 |
+
for(unsigned int i_bef=0;i_bef<I;i_bef++,ep++)
|
| 334 |
+
{
|
| 335 |
+
CLASSIFY(i,i_empty,ireal);
|
| 336 |
+
CLASSIFY2(i_bef,i_befreal);
|
| 337 |
+
if( i_empty )
|
| 338 |
+
p0c+=*ep * mult;
|
| 339 |
+
else
|
| 340 |
+
{
|
| 341 |
+
counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
|
| 342 |
+
frenchClass ,jj+1,*ep * mult,0.0);
|
| 343 |
+
np0c+=*ep * mult;
|
| 344 |
+
}
|
| 345 |
+
massert( &epsilon[jj](i,i_bef)== ep);
|
| 346 |
+
}
|
| 347 |
+
}
|
| 348 |
+
}
|
| 349 |
+
}
|
| 350 |
+
double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
|
| 351 |
+
Array<double>&ai=counts.doGetAlphaInit(I);
|
| 352 |
+
Array<double>&bi=counts.doGetBetaInit(I);
|
| 353 |
+
int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
|
| 354 |
+
for(i=0;i<I;i++,gp1++,gp2++)
|
| 355 |
+
{
|
| 356 |
+
CLASSIFY(i,i_empty,ireal);
|
| 357 |
+
ai[i]+= *gp1;
|
| 358 |
+
bi[i]+= *gp2;
|
| 359 |
+
if( DependencyOfPrevAJ==0 )
|
| 360 |
+
{
|
| 361 |
+
if( i_empty )
|
| 362 |
+
p0c+=*gp1;
|
| 363 |
+
else
|
| 364 |
+
{
|
| 365 |
+
counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
|
| 366 |
+
np0c+=*gp1;
|
| 367 |
+
}
|
| 368 |
+
}
|
| 369 |
+
}
|
| 370 |
+
if( Verbose )
|
| 371 |
+
cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
|
| 372 |
+
}
|
| 373 |
+
cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
|
| 374 |
+
Array<int>vit;
|
| 375 |
+
double viterbi_score=1.0;
|
| 376 |
+
if( (HMMTrainingSpecialFlags&1) )
|
| 377 |
+
HMMViterbi(*net,gamma,vit);
|
| 378 |
+
else
|
| 379 |
+
viterbi_score=HMMRealViterbi(*net,vit);
|
| 380 |
+
for(j=1;j<=m;j++)
|
| 381 |
+
{
|
| 382 |
+
viterbi_alignment[j]=vit[j-1]+1;
|
| 383 |
+
if( viterbi_alignment[j]>l)
|
| 384 |
+
viterbi_alignment[j]=0;
|
| 385 |
+
}
|
| 386 |
+
sHandler1.setProbOfSentence(sent,cross_entropy);
|
| 387 |
+
perp.addFactor(cross_entropy, so, l, m,1);
|
| 388 |
+
viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
|
| 389 |
+
if( Verbose )
|
| 390 |
+
cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
|
| 391 |
+
delete net;net=0;
|
| 392 |
+
if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
|
| 393 |
+
printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
|
| 394 |
+
addAL(viterbi_alignment,sent.getSentenceNo(),l);
|
| 395 |
+
pair_no++;
|
| 396 |
+
} /* of while */
|
| 397 |
+
sHandler1.rewind();
|
| 398 |
+
perp.record("HMM");
|
| 399 |
+
viterbi_perp.record("HMM");
|
| 400 |
+
errorReportAL(cout,"HMM");
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
#include "HMMTables.cpp"
|
| 404 |
+
template class HMMTables<int,WordClasses>;
|
| 405 |
+
|
tools/giza-pp/GIZA++-v2/hmm.h
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
#ifndef _hmm_h
|
| 23 |
+
#define _hmm_h 1
|
| 24 |
+
|
| 25 |
+
#include <cassert>
|
| 26 |
+
|
| 27 |
+
#include <iostream>
|
| 28 |
+
#include <algorithm>
|
| 29 |
+
#include <functional>
|
| 30 |
+
#include <map>
|
| 31 |
+
#include <set>
|
| 32 |
+
#include "Vector.h"
|
| 33 |
+
#include <utility>
|
| 34 |
+
|
| 35 |
+
#include <fstream>
|
| 36 |
+
#include <cmath>
|
| 37 |
+
#include <ctime>
|
| 38 |
+
|
| 39 |
+
#include "TTables.h"
|
| 40 |
+
#include "ATables.h"
|
| 41 |
+
#include "getSentence.h"
|
| 42 |
+
#include "defs.h"
|
| 43 |
+
#include "model2.h"
|
| 44 |
+
#include "Perplexity.h"
|
| 45 |
+
#include "vocab.h"
|
| 46 |
+
#include "WordClasses.h"
|
| 47 |
+
#include "HMMTables.h"
|
| 48 |
+
#include "ForwardBackward.h"
|
| 49 |
+
|
| 50 |
+
class hmm : public model2
|
| 51 |
+
{
|
| 52 |
+
private:
|
| 53 |
+
WordClasses ewordclasses;
|
| 54 |
+
WordClasses fwordclasses;
|
| 55 |
+
HMMTables<int,WordClasses> counts,probs;
|
| 56 |
+
public:
|
| 57 |
+
template<class MAPPER>
|
| 58 |
+
void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile)
|
| 59 |
+
{
|
| 60 |
+
ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
|
| 61 |
+
if( !estrm )
|
| 62 |
+
{
|
| 63 |
+
cerr << "ERROR: can not read " << efile << endl;
|
| 64 |
+
}
|
| 65 |
+
else
|
| 66 |
+
ewordclasses.read(estrm,m1);
|
| 67 |
+
if( !fstrm )
|
| 68 |
+
cerr << "ERROR: can not read " << ffile << endl;
|
| 69 |
+
else
|
| 70 |
+
fwordclasses.read(fstrm,m2);
|
| 71 |
+
}
|
| 72 |
+
hmm(model2&m2);
|
| 73 |
+
void initialize_table_uniformly(sentenceHandler&);
|
| 74 |
+
int em_with_tricks(int);
|
| 75 |
+
void load_table(const char* aname);
|
| 76 |
+
void em_loop(Perplexity& perp, sentenceHandler& sHandler1, bool dump_files,
|
| 77 |
+
const char* alignfile, Perplexity&, bool test,bool doInit,int iter);
|
| 78 |
+
HMMNetwork *makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const;
|
| 79 |
+
friend class model3;
|
| 80 |
+
};
|
| 81 |
+
|
| 82 |
+
#endif
|
tools/giza-pp/GIZA++-v2/logprob.cpp
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
|
| 3 |
+
EGYPT Toolkit for Statistical Machine Translation
|
| 4 |
+
Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
|
| 5 |
+
|
| 6 |
+
This program is free software; you can redistribute it and/or
|
| 7 |
+
modify it under the terms of the GNU General Public License
|
| 8 |
+
as published by the Free Software Foundation; either version 2
|
| 9 |
+
of the License, or (at your option) any later version.
|
| 10 |
+
|
| 11 |
+
This program is distributed in the hope that it will be useful,
|
| 12 |
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 13 |
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 14 |
+
GNU General Public License for more details.
|
| 15 |
+
|
| 16 |
+
You should have received a copy of the GNU General Public License
|
| 17 |
+
along with this program; if not, write to the Free Software
|
| 18 |
+
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
| 19 |
+
USA.
|
| 20 |
+
|
| 21 |
+
*/
|
| 22 |
+
|
| 23 |
+
// Routines to perform integer exponential arithmetic.
|
| 24 |
+
// A number x is represented as n, where x = b**n.
|
| 25 |
+
// It is assumed that b > 1, something like b = 1.001;
|
| 26 |
+
|
| 27 |
+
#include "logprob.h"
|
| 28 |
+
#include <stdlib.h>
|
| 29 |
+
#include <stdio.h>
|
| 30 |
+
#include <iostream>
|
| 31 |
+
#include <fstream>
|
| 32 |
+
#include <string>
|
| 33 |
+
double *LogProb::ntof = NULL; // Tables will be initialized
|
| 34 |
+
int *LogProb::addtbl = NULL; // in Initialize function.
|
| 35 |
+
int *LogProb::subtbl = NULL; //
|
| 36 |
+
|
| 37 |
+
const int LogProb::max_2byte_integer = 32767;
|
| 38 |
+
const int LogProb::min_2byte_integer = -32768;
|
| 39 |
+
const double LogProb::b = 1.001; // a logarithm basis
|
| 40 |
+
const double LogProb::logb2 = log(b);
|
| 41 |
+
//const int LogProb::nmax = round(78.0E0 * log(1.0E1) / logb2);
|
| 42 |
+
const int LogProb::nmax = round(300.0E0 * log(1.0E1) / logb2);
|
| 43 |
+
const int LogProb::nmin = -nmax;
|
| 44 |
+
const int LogProb::tblbnd = round(log((b-1.0E0)/2.0E0)/logb2);
|
| 45 |
+
const int LogProb::zeron = round(pow(-2, 23));
|
| 46 |
+
const int LogProb::onen = 0;
|
| 47 |
+
const int LogProb::infn = onen - zeron;
|
| 48 |
+
|
| 49 |
+
const int LogProb::initialized = LogProb::Initialize();
|
| 50 |
+
const LogProb LogProb::zero(0);
|
| 51 |
+
const LogProb LogProb::one(1);
|
| 52 |
+
const LogProb LogProb::minus2(1e-2);
|
| 53 |
+
const LogProb LogProb::minus4(1e-4);
|
| 54 |
+
const LogProb LogProb::minus6(1e-6);
|
| 55 |
+
const LogProb LogProb::minus8(1e-8);
|
| 56 |
+
const LogProb LogProb::minus10(1e-10);
|
| 57 |
+
const LogProb LogProb::minus12(1e-12);
|
| 58 |
+
const LogProb LogProb::minus14(1e-14);
|
| 59 |
+
const LogProb LogProb::minus16(1e-16);
|
| 60 |
+
|
| 61 |
+
// static table initialization function
|
| 62 |
+
int LogProb::Initialize()
|
| 63 |
+
{
|
| 64 |
+
int nbytes = sizeof(double)*(nmax-nmin+1) + sizeof(int)*(0-tblbnd+1);
|
| 65 |
+
std::cerr << nbytes << " bytes used for LogProb tables (C++ version)\n";
|
| 66 |
+
ntof = new double[nmax-nmin+1];
|
| 67 |
+
addtbl = new int[-tblbnd+1];
|
| 68 |
+
subtbl = new int[-tblbnd+1];
|
| 69 |
+
|
| 70 |
+
// char filename[257];
|
| 71 |
+
// string filename ;
|
| 72 |
+
// ifstream ifs;
|
| 73 |
+
// ifs.open(filename.c_str());
|
| 74 |
+
// if (!ifs)
|
| 75 |
+
// {
|
| 76 |
+
int i;
|
| 77 |
+
std::cerr << "Building integer logs conversion tables\n";
|
| 78 |
+
ntof[0] = 0 ;
|
| 79 |
+
|
| 80 |
+
for (i=nmin+1; i<=nmax; ++i)
|
| 81 |
+
{
|
| 82 |
+
double x = i;
|
| 83 |
+
ntof[i-nmin] = exp(x*logb2);
|
| 84 |
+
|
| 85 |
+
}
|
| 86 |
+
for (i=tblbnd; i<=0; ++i)
|
| 87 |
+
{
|
| 88 |
+
double x = 1.0 + pow(b, i);
|
| 89 |
+
addtbl[i-tblbnd] = round(log(x)/logb2);
|
| 90 |
+
}
|
| 91 |
+
double sqrtb = exp(0.5*logb2);
|
| 92 |
+
for (i=0; i<=-tblbnd; ++i)
|
| 93 |
+
{
|
| 94 |
+
double x = sqrtb * pow(b, i) - 1.0;
|
| 95 |
+
subtbl[i] = round(log(x)/logb2);
|
| 96 |
+
}
|
| 97 |
+
// if (toolsRoot)
|
| 98 |
+
// {
|
| 99 |
+
// ofstream ofs(filename.c_str());
|
| 100 |
+
// if (!ofs)
|
| 101 |
+
// cerr << "Could not write LogProb data to " << filename << endl;
|
| 102 |
+
// else
|
| 103 |
+
// {
|
| 104 |
+
// ofs.write((const char *)ntof, sizeof(double) * (nmax-nmin+1));
|
| 105 |
+
// ofs.write((const char *)addtbl, sizeof(int) * (-tblbnd+1));
|
| 106 |
+
// ofs.write((const char *)subtbl, sizeof(int) * (-tblbnd+1));
|
| 107 |
+
// }
|
| 108 |
+
// }
|
| 109 |
+
// }
|
| 110 |
+
// else
|
| 111 |
+
// {
|
| 112 |
+
// ifs.read((char *)ntof, sizeof(double) * (nmax - nmin + 1));
|
| 113 |
+
// ifs.read((char *)addtbl, sizeof(int) * (-tblbnd+1));
|
| 114 |
+
// ifs.read((char *)subtbl, sizeof(int) * (-tblbnd+1));
|
| 115 |
+
// }
|
| 116 |
+
return 1;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
void LogProb::FreeTables()
|
| 120 |
+
{
|
| 121 |
+
delete [] addtbl;
|
| 122 |
+
delete [] subtbl;
|
| 123 |
+
delete [] ntof;
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
//---------------------------------------------------------------------------
|
| 127 |
+
// Aritmetic operators
|
| 128 |
+
//---------------------------------------------------------------------------
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
// Subtract two logarithm numbers. Use the following method:
|
| 132 |
+
// b**n - b**m = b**m( b**(n-m) - 1 ), assuming n >= m.
|
| 133 |
+
LogProb& LogProb::operator-=(const LogProb &subs)
|
| 134 |
+
{
|
| 135 |
+
if (subs.logr == zeron)
|
| 136 |
+
return *this;
|
| 137 |
+
int a = logr - subs.logr;
|
| 138 |
+
if (a <= 0)
|
| 139 |
+
{
|
| 140 |
+
if (a < 0)
|
| 141 |
+
{
|
| 142 |
+
std::cerr << "WARNING(logprob): Invalid arguments to nsub" <<(*this)<< " " << subs << std::endl;
|
| 143 |
+
//abort();
|
| 144 |
+
}
|
| 145 |
+
logr = zeron;
|
| 146 |
+
return *this;
|
| 147 |
+
}
|
| 148 |
+
if (a > -tblbnd)
|
| 149 |
+
return *this;
|
| 150 |
+
logr = subs.logr + subtbl[a];
|
| 151 |
+
return *this;
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
|