| #!/usr/bin/env perl | |
| # | |
| # This file is part of moses. Its use is licensed under the GNU Lesser General | |
| # Public License version 2.1 or, at your option, any later version. | |
| use warnings; | |
| use strict; | |
| while (@ARGV) { | |
| $_ = shift; | |
| /^-b$/ && ($| = 1, next); # not buffered (flush each line) | |
| } | |
| while(<STDIN>) { | |
| chop; | |
| # avoid general madness | |
| s/[\000-\037]//g; | |
| s/\s+/ /g; | |
| s/^ //g; | |
| s/ $//g; | |
| # special characters in moses | |
| s/\&/\&/g; # escape escape | |
| s/\|/\|/g; # factor separator | |
| s/\</\</g; # xml | |
| s/\>/\>/g; # xml | |
| s/\'/\'/g; # xml | |
| s/\"/\"/g; # xml | |
| s/\[/\[/g; # syntax non-terminal | |
| s/\]/\]/g; # syntax non-terminal | |
| # restore xml instructions | |
| s/\<(\S+) translation="(.+?)"> (.+?) <\/(\S+)>/\<$1 translation=\"$2\"> $3 <\/$4>/g; | |
| print $_."\n"; | |
| } | |