File size: 763 Bytes
fd49381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/usr/bin/env perl
#
# This file is part of moses.  Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.

use warnings;
use strict;

die("ERROR syntax: input-from-sgm.perl < in.sgm > in.txt")
    unless scalar @ARGV == 0;

while(my $line = <STDIN>) {
    chop($line);
    while ($line =~ /<seg[^>]+>\s*$/i) {
	my $next_line = <STDIN>;
	$line .= $next_line;
	chop($line);
    }
    while ($line =~ /<seg[^>]+>\s*(.*)\s*$/i &&
	   $line !~ /<seg[^>]+>\s*(.*)\s*<\/seg>/i) {
	my $next_line = <STDIN>;
	$line .= $next_line;
	chop($line);
    }
    if ($line =~ /<seg[^>]+>\s*(.*)\s*<\/seg>/i) {
	my $input = $1;
	$input =~ s/\s+/ /g;
	$input =~ s/^ //g;
	$input =~ s/ $//g;
	print $input."\n";
    }
}