| #! /usr/bin/perl | |
| # This is a script for checking whether a file contains any carriage return | |
| # characters, and whether it is valid UTF-8. | |
| use Encode; | |
| # This subroutine does the work for one file. | |
| $yield = 0; | |
| $ascii = 0; # bool | |
| $crlf = 0; # bool | |
| sub checktxt { | |
| my($file) = $_[0]; | |
| open(IN, "<:raw", "$file") || die "Can't open $file for input"; | |
| $bin = do { local $/ = undef; <IN> }; | |
| close(IN); | |
| my $data; | |
| eval | |
| { | |
| $data = Encode::decode("UTF-8", $bin, Encode::FB_CROAK); | |
| 1; # return true | |
| } | |
| or do | |
| { | |
| printf "Bad UTF-8 in $file\n"; | |
| $yield = 1; | |
| return; | |
| }; | |
| if (!$crlf && index($data, "\r") != -1) | |
| { | |
| printf "CR in $file\n"; | |
| $yield = 1; | |
| } | |
| if ($ascii && $data =~ /[^\x01-\x7e]/) | |
| { | |
| printf "Non-ASCII in $file\n"; | |
| $yield = 1; | |
| } | |
| } | |
| # This is the main program | |
| $, = ""; # Output field separator | |
| for ($i = 0; $i < @ARGV; $i++) | |
| { | |
| if ($ARGV[$i] eq "-ascii") | |
| { | |
| $ascii = 1; | |
| } | |
| elsif ($ARGV[$i] eq "-crlf") | |
| { | |
| $crlf = 1; | |
| } | |
| else | |
| { | |
| checktxt($ARGV[$i]); | |
| } | |
| } | |
| exit $yield; | |
| # End | |