|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
use warnings; |
|
|
use strict; |
|
|
use FindBin qw($RealBin); |
|
|
use Getopt::Long; |
|
|
|
|
|
my $order = 3; |
|
|
my $corpusPath; |
|
|
my $lmPath; |
|
|
my $cores = 2; |
|
|
my $irstPath; |
|
|
my $tempPath = "tmp"; |
|
|
my $pruneSingletons = 1; |
|
|
my $smoothing = "msb"; |
|
|
my $dummy; |
|
|
|
|
|
GetOptions("order=s" => \$order, |
|
|
"text=s" => \$corpusPath, |
|
|
"lm=s" => \$lmPath, |
|
|
"cores=s" => \$cores, |
|
|
"irst-dir=s" => \$irstPath, |
|
|
"temp-dir=s" => \$tempPath, |
|
|
"p=i" => \$pruneSingletons, |
|
|
"s=s" => \$smoothing, |
|
|
"interpolate!" => \$dummy, |
|
|
"kndiscount!" => \$dummy |
|
|
) or exit 1; |
|
|
|
|
|
|
|
|
die("ERROR: please set text") unless defined($corpusPath); |
|
|
die("ERROR: please set lm") unless defined($lmPath); |
|
|
die("ERROR: please set irst-dir") unless defined($irstPath); |
|
|
|
|
|
|
|
|
$tempPath .= "/irstlm-build-tmp.$$"; |
|
|
`mkdir -p $tempPath`; |
|
|
|
|
|
|
|
|
my $cmd = "cat $corpusPath | $irstPath/add-start-end.sh > $tempPath/setagged"; |
|
|
print STDERR "EXECUTING $cmd\n"; |
|
|
`$cmd`; |
|
|
|
|
|
|
|
|
$cmd = "$irstPath/ngt -i=$tempPath/setagged -n=$order -b=yes -o=$tempPath/counts"; |
|
|
print STDERR "EXECUTING $cmd\n"; |
|
|
`$cmd`; |
|
|
|
|
|
|
|
|
$cmd = "$irstPath/tlm -o=$lmPath -lm=$smoothing -bo=yes -n=$order -tr=$tempPath/counts"; |
|
|
$cmd .= " -ps=no" unless $pruneSingletons; |
|
|
print STDERR "EXECUTING $cmd\n"; |
|
|
`$cmd`; |
|
|
|
|
|
$cmd = "rm -rf $tempPath"; |
|
|
print STDERR "EXECUTING $cmd\n"; |
|
|
`$cmd`; |
|
|
|
|
|
print STDERR "FINISH.\n"; |
|
|
|