| #! /usr/bin/perl -w | |
| # Script to take the output of nroff -man and remove all the backspacing and | |
| # the page footers and the screen commands etc so that it is more usefully | |
| # readable online. In fact, in the latest nroff, intermediate footers don't | |
| # seem to be generated any more. | |
| $blankcount = 0; | |
| $lastwascut = 0; | |
| $firstheader = 1; | |
| # Input on STDIN; output to STDOUT. | |
| while (<STDIN>) | |
| { | |
| s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" | |
| s/.\x8//g; # Remove "char, backspace" | |
| # Handle header lines. Retain only the first one we encounter, but remove | |
| # the blank line that follows. Any others (e.g. at end of document) and the | |
| # following blank line are dropped. | |
| if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) | |
| { | |
| if ($firstheader) | |
| { | |
| $firstheader = 0; | |
| print; | |
| $lastprinted = $_; | |
| $lastwascut = 0; | |
| } | |
| $_=<STDIN>; # Remove a blank that follows | |
| next; | |
| } | |
| # Count runs of empty lines | |
| if (/^\s*$/) | |
| { | |
| $blankcount++; | |
| $lastwascut = 0; | |
| next; | |
| } | |
| # If a chunk of lines has been cut out (page footer) and the next line | |
| # has a different indentation, put back one blank line. | |
| if ($lastwascut && $blankcount < 1 && defined($lastprinted)) | |
| { | |
| ($a) = $lastprinted =~ /^(\s*)/; | |
| ($b) = $_ =~ /^(\s*)/; | |
| $blankcount++ if ($a ne $b); | |
| } | |
| # We get here only when we have a non-blank line in hand. If it was preceded | |
| # by 3 or more blank lines, read the next 3 lines and see if they are blank. | |
| # If so, remove all 7 lines, and remember that we have just done a cut. | |
| if ($blankcount >= 3) | |
| { | |
| for ($i = 0; $i < 3; $i++) | |
| { | |
| $next[$i] = <STDIN>; | |
| $next[$i] = "" if !defined $next[$i]; | |
| $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" | |
| $next[$i] =~ s/.\x8//g; # Remove "char, backspace" | |
| } | |
| # Cut out chunks of the form <3 blanks><non-blank><3 blanks> | |
| if ($next[0] =~ /^\s*$/ && | |
| $next[1] =~ /^\s*$/ && | |
| $next[2] =~ /^\s*$/) | |
| { | |
| $blankcount -= 3; | |
| $lastwascut = 1; | |
| } | |
| # Otherwise output the saved blanks, the current, and the next three | |
| # lines. Remember the last printed line. | |
| else | |
| { | |
| for ($i = 0; $i < $blankcount; $i++) { print "\n"; } | |
| print; | |
| for ($i = 0; $i < 3; $i++) | |
| { | |
| $next[$i] =~ s/.\x8//g; | |
| print $next[$i]; | |
| $lastprinted = $_; | |
| } | |
| $lastwascut = 0; | |
| $blankcount = 0; | |
| } | |
| } | |
| # This non-blank line is not preceded by 3 or more blank lines. Output | |
| # any blanks there are, and the line. Remember it. Force two blank lines | |
| # before headings. | |
| else | |
| { | |
| $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && | |
| defined($lastprinted); | |
| for ($i = 0; $i < $blankcount; $i++) { print "\n"; } | |
| print; | |
| $lastprinted = $_; | |
| $lastwascut = 0; | |
| $blankcount = 0; | |
| } | |
| } | |
| # End | |