perl script

I have the impression there is a a very small but nonzero demand for a
tool to aid in composing a regex to match a block of IP addresses.  It
was a fun little exercise.  I hope it will help someone, either to use
directly or in understanding the nuances in writing their own regexes.

Here's an example of its use:

  % iprangex.pl 166.84.1-127.0-255
  166\.
  0?84\.
  (0?0?[1-9]|0?[1-9][0-9]|1[01][0-9]|12[0-7])\.
  ([01]?[0-9]?[0-9]|2[0-4][0-9]|25[0-5])

The line-breaks are only for the purposes of this message, and are not
a normal feature of the output.  Looking at the last subexpression, it
is clear that a rigorous RE for even as simple a range as 0-255 is not
as pristine as one might have expected.  That said, play with it, have
some fun.  Comments and suggestions welcome, script is included below.

Mike

-- 
--       Con    In                                       Hanc marginis
    --   Tact   Formation                                    exiguitas
    --                                                     non caperet.


#!/usr/local/bin/perl -w
#------------------------------------------------------------------------------
# iprangex.pl
#       Compose a regular expression to match a dotted decimal IP range
#       for the four octets in an IP address.
#------------------------------------------------------------------------------

BEGIN
{
    require 5.005
}

use integer;                            # no floats here
use strict;                             # no fudges here (yeah, right)
use diagnostics;                        # verbose runtime errors
  # English;                            # enable $ARG for $_
use Getopt::Long;                       # try not to reinvent the wheel

use vars qw($opt_help $opt_debug);      # globals

my @args = Opt();                       # command line options

#------------------------------------------------------------------------------

for (@args)                             # command line arguments
{
    my @ip = split /\./, shift;         # split each spec into 4 ranges
    my @re = ();

    @ip == 4 or Err("Not four octet ranges");
    print "\n" if $opt_debug;

    for (@ip)
    {
        my @r = split /-/;              # split each range into (begin, end)

        @r == 1 and $r[1] = $r[0];      # convert a singleton to a range
        @r == 2  or Err("Bad syntax in octet range");

        push @re, Gee($r[0], $r[1]);    # process one range
    }

    print "\n" if $opt_debug;
    print join("\\.", @re), "\n";
}

#------------------------------------------------------------------------------
exit 0;                                 # end of main
#------------------------------------------------------------------------------

sub MAX() { 256 }                       # for IP addresses (at most 3 digits)

#------------------------------------------------------------------------------

sub Gee($$)                             # process one pre-parsed octet range
{
    my ($first, $final) = @_;           # inclusive range spec

    0 <= $first && $first <= $final && $final < MAX
        or Err("Invalid range specified");  # bounce check

    my @range = ($first, $final + 1);   # we make intervals obey L <= x < R
    my $point = 0;

    splice(@range, -1, 0, $point) while $point = Cut(\(_at_)range);

    my $regex = Run(\(_at_)range);           # spit out the corresponding string

return $regex unless $opt_debug;        # end of non-debug code

    $range[(_at_)range] = MAX + 1;           # dummy value for the final 
pseudo-cut

    for (my $i = 0; $i < @range - 2; $i++)  # validate all cuts (triples)
    {
        my $b = $range[$i + 0];         # points must be distinct and in order
        my $c = $range[$i + 1];         # and only 1-digit transitions allowed
        my $e = $range[$i + 2];

        Let($b, $c, $e) or warn "\n\nBOGOSITY DETECTED!  ($b-$c-$e)\n\n";
    }

return $regex
}

#------------------------------------------------------------------------------

sub Cut(\$)                             # break range into 1-digit transitions
{                                       # only splits last interval in range
    my $a = shift;                      # (effectively a recursive algorithm)

return 0 unless @$a >= 2;               # size check (an assert, more or less)

    my $b = $a->[-2];                   # begin value of the last interval
    my $e = $a->[-1];                   # end value of that interval
    my $f = $e - 1;                     # final value that's actually in-range

    Let($b, $e, MAX + 1)                and return 0; # maybe nothing to do

    my @b = Dig($b);                    # break into ones, tens, hundreds
    my @f = Dig($f);

    my $t = $b - $b[0] + 10;            # push left end up to multiple of 10
    my $h = $b - $b[1] + 100;           # and then up to a multiple of 100

    $b[0]           && Let($b, $t, $e)  and return $t; # ones normalized
    $b[1]           && Let($b, $h, $e)  and return $h; # tens normalized

    $h = $f[2];                         # lop off excess hundreds
    $t = $f - $f[0];                    # lop off excess tens

    $f[2] > $b[2]   && Let($b, $h, $e)  and return $h; # centuries skipped
    $f[1] > $b[1]   && Let($b, $t, $e)  and return $t; # decades   skipped

return 0                                # only excess ones left now, we hope
}

#------------------------------------------------------------------------------

sub Let($$$)                            # check that a cut passes muster
{
    my ($b, $c, $e) = @_;               # begin, cut (the candidate), end
    my $f = $c - 1;                     # 0 to 100 means values 0 to 99

return 0 unless $b < $c && $c < $e;     # must be distinct and in order

    my @b = Dig($b);                    # break into ones, tens, hundreds
    my @f = Dig($f);

    # Require b to f be only a 1-digit transition.  One digit can change,
    # higher digits must not, and lower digits must be [0-9].

return $b[0] <= $f[0] && Equ([ @b[1..2]            ], [ @f[1..2]    ])
    || $b[1] <  $f[1] && Equ([ $b[2], $b[0], $f[0] ], [ $f[2], 0, 9 ])
    || $b[2] <  $f[2] && Equ([ @b[0..1], @f[0..1]  ], [ 0, 0, 9, 90 ])
}                                       # note, tens are 2 digits, hence 90

#------------------------------------------------------------------------------

sub Run(\$)                             # compose a regex for the whole range
{
    my $a = shift;                      # the range, broken into subintervals

return 0 unless @$a >= 2;               # size check (an assert, more or less)

    print join(", ", @$a), "\n" if $opt_debug;

    my $b = $a->[0];                    # begin, end pair for each subinterval
    my $e = MAX + 1;
    my @r = ();                         # result strings

    $#r = @$a - 2;                      # preallocate the (up to 5) subranges

    for (my $i = 0; $i < @$a - 1; $i++, $b = $e)
    {
        $e     = $a->[$i + 1];          # end value for this subinterval
        $r[$i] = Hop($b, $e);           # regex for this subinterval
    }

    my $s = join("|", @r);              # form the disjunction

    $s = "(" . $s . ")" if @r > 1;      # parens as needed

return $s
}

#------------------------------------------------------------------------------

sub Hop($$)                             # simple regex for one subinterval
{
    my ($b, $e) = @_;                   # begin, end
    my $f = $e - 1;                     # final value that's actually in-range

    my @b = Dig($b);                    # break into ones, tens, hundreds
    my @f = Dig($f);

    my $r = "0";                        # result string

    for (my ($i, $t) = (1, 10); $i < @b; $i++, $t *= 10)
    {
        $b[$i] /= $t;                   # descale the digits to be digits
        $f[$i] /= $t;
    }

    if ($b[0] <= $f[0] && Equ([ @b[1..2] ], [ @f[1..2] ]))
    {
        $r  = Led($b[2]);               # lead digit
        $r .= Mid($b[2], $b[1]);        # middle digit
        $r .= Ran($b[0], $f[0]);        # digit range
    }
    elsif ($b[1] < $f[1] && Equ([ $b[2], $b[0], $f[0] ], [ $f[2], 0, 9 ]))
    {
        $r  = Led($b[2]);               # lead digit
        $r .= Ran($b[1], $f[1]);        # digit range
        $r .= "?" unless $b[2] || $b[1]; # maybe optional
        $r .= "[0-9]";                  # any digit
    }
    elsif ($b[2] < $f[2] && Equ([ @b[0..1], @f[0..1] ], [ 0, 0, 9, 9 ]))
    {
        $r  = Ran($b[2], $f[2]);        # digit range
        $r .= "?" unless $b[2];         # maybe optional
        $r .= "[0-9]";                  # any digit
        $r .= "?" unless $b[2];         # maybe optional
        $r .= "[0-9]";                  # any digit
    }

return $r                               # return the regex
}

#------------------------------------------------------------------------------

sub Dig($)                              # resolve a number into 3 digits
{
    my $n = shift;                      # the number

return 0 unless $n < 1000;              # size check (an assert, more or less)

    my @a = ($n % 10, $n % 100, $n);    # result array = ones, tens, hundreds

    for (my $i = 2; $i; $i--)           # some tens and ones still mixed in
    {
        $a[$i] -= $a[$i - 1];           # distill using next mixed up digit
    }

return @a                               # return the 3-digit array
}

#------------------------------------------------------------------------------

sub Equ(\$\$)                           # compare two lists for equality
{
    my ($a, $b) = @_;

return 0 unless @$a == @$b;             # lengths must match exactly

    for (my $i = 0; $i < @$a; $i++)
    {
return 0 if $a->[$i] != $b->[$i];       # all entries must match
    }

return 1                                # only get here when all matched
}

#------------------------------------------------------------------------------

sub Led($)                              # stringize lead digit, add ? if 0
{
    my $n = shift;                      # the lead digit

return $n ? "$n" : "0?";                # string result or optional lead 0
}

#------------------------------------------------------------------------------

sub Mid($$)                             # stringize middle digit, add ? if 00
{
    my ($h, $t) = @_;                   # hundreds and tens digits

return $h || $t ? "$t" : "0?"           # string result or optional middle 0
}

#------------------------------------------------------------------------------

sub Ran($$)                             # stringize single-digit range
{
    my ($b, $f) = @_;                   # beginning and final digit in range

return $b == $f     ? "$b" :            # single digit
       $b == $f - 1 ? "[$b$f]"          # two possible digits
                    : "[$b-$f]";        # at least 3 possible digits
}

#------------------------------------------------------------------------------

sub Opt()                               # process command line options
{
    GetOptions("help!", "debug!")
        or Err();

    $opt_help   = 0     unless defined $opt_help;
    $opt_debug  = 0     unless defined $opt_debug;

    disable diagnostics         unless $opt_debug;
    Use()                           if $opt_help || !(_at_)ARGV;

return @ARGV
}

#------------------------------------------------------------------------------

sub Err(;$$)                            # optional description and exit code
{
    my ($msg, $err) = @_;

    warn "\nError: $msg\n"  if $msg;

    my $pgm = $0;                       # program name
    $pgm =~ s|.*/||;                    # strip path if any

    warn "\nType '$pgm --help' for usage info  (or -h also works)\n\n";

exit($err || 1)                         # default to exit 1 (never 0)
}

#------------------------------------------------------------------------------

sub Use()                               # usage note
{
    print <DATA>;

exit 0
}

#------------------------------------------------------------------------------
exit 1                                  # should never get here
#------------------------------------------------------------------------------
__END__

Usage:      iprangex.pl [options] dotted-IP-range

Purpose:    Compose a regular expression to match a dotted decimal IP range for
            the four octets in an IP address.

Options:    --help      show this help info
            --debug     enable diagnostics

Discussion: A dotted-IP-range is four octet-ranges separated by dots.  An octet
            range is of the form "first-last", where "-last" is optional.  Must
            conform to 0 <= first <= last <= 255.  Prints out a regex that will
            match that IP range.  For example, the regex for octet range 54-123
            is 0?5[4-9]|0?[6-9][0-9]|1[01][0-9]|12[0-3].  The octet regexes are
            parenthesized if needed, dotted together, and then printed out.
--
v2.0b, 8/05/02, Mike Peeler <zconcept(_at_)netcom(_dot_)com>


_______________________________________________
procmail mailing list
procmail(_at_)lists(_dot_)RWTH-Aachen(_dot_)DE
http://MailMan.RWTH-Aachen.DE/mailman/listinfo/procmail