I have the impression there is a a very small but nonzero demand for a
tool to aid in composing a regex to match a block of IP addresses. It
was a fun little exercise. I hope it will help someone, either to use
directly or in understanding the nuances in writing their own regexes.
Here's an example of its use:
% iprangex.pl 166.84.1-127.0-255
166\.
0?84\.
(0?0?[1-9]|0?[1-9][0-9]|1[01][0-9]|12[0-7])\.
([01]?[0-9]?[0-9]|2[0-4][0-9]|25[0-5])
The line-breaks are only for the purposes of this message, and are not
a normal feature of the output. Looking at the last subexpression, it
is clear that a rigorous RE for even as simple a range as 0-255 is not
as pristine as one might have expected. That said, play with it, have
some fun. Comments and suggestions welcome, script is included below.
Mike
--
-- Con In Hanc marginis
-- Tact Formation exiguitas
-- non caperet.
#!/usr/local/bin/perl -w
#------------------------------------------------------------------------------
# iprangex.pl
# Compose a regular expression to match a dotted decimal IP range
# for the four octets in an IP address.
#------------------------------------------------------------------------------
BEGIN
{
require 5.005
}
use integer; # no floats here
use strict; # no fudges here (yeah, right)
use diagnostics; # verbose runtime errors
# English; # enable $ARG for $_
use Getopt::Long; # try not to reinvent the wheel
use vars qw($opt_help $opt_debug); # globals
my @args = Opt(); # command line options
#------------------------------------------------------------------------------
for (@args) # command line arguments
{
my @ip = split /\./, shift; # split each spec into 4 ranges
my @re = ();
@ip == 4 or Err("Not four octet ranges");
print "\n" if $opt_debug;
for (@ip)
{
my @r = split /-/; # split each range into (begin, end)
@r == 1 and $r[1] = $r[0]; # convert a singleton to a range
@r == 2 or Err("Bad syntax in octet range");
push @re, Gee($r[0], $r[1]); # process one range
}
print "\n" if $opt_debug;
print join("\\.", @re), "\n";
}
#------------------------------------------------------------------------------
exit 0; # end of main
#------------------------------------------------------------------------------
sub MAX() { 256 } # for IP addresses (at most 3 digits)
#------------------------------------------------------------------------------
sub Gee($$) # process one pre-parsed octet range
{
my ($first, $final) = @_; # inclusive range spec
0 <= $first && $first <= $final && $final < MAX
or Err("Invalid range specified"); # bounce check
my @range = ($first, $final + 1); # we make intervals obey L <= x < R
my $point = 0;
splice(@range, -1, 0, $point) while $point = Cut(\(_at_)range);
my $regex = Run(\(_at_)range); # spit out the corresponding string
return $regex unless $opt_debug; # end of non-debug code
$range[(_at_)range] = MAX + 1; # dummy value for the final
pseudo-cut
for (my $i = 0; $i < @range - 2; $i++) # validate all cuts (triples)
{
my $b = $range[$i + 0]; # points must be distinct and in order
my $c = $range[$i + 1]; # and only 1-digit transitions allowed
my $e = $range[$i + 2];
Let($b, $c, $e) or warn "\n\nBOGOSITY DETECTED! ($b-$c-$e)\n\n";
}
return $regex
}
#------------------------------------------------------------------------------
sub Cut(\$) # break range into 1-digit transitions
{ # only splits last interval in range
my $a = shift; # (effectively a recursive algorithm)
return 0 unless @$a >= 2; # size check (an assert, more or less)
my $b = $a->[-2]; # begin value of the last interval
my $e = $a->[-1]; # end value of that interval
my $f = $e - 1; # final value that's actually in-range
Let($b, $e, MAX + 1) and return 0; # maybe nothing to do
my @b = Dig($b); # break into ones, tens, hundreds
my @f = Dig($f);
my $t = $b - $b[0] + 10; # push left end up to multiple of 10
my $h = $b - $b[1] + 100; # and then up to a multiple of 100
$b[0] && Let($b, $t, $e) and return $t; # ones normalized
$b[1] && Let($b, $h, $e) and return $h; # tens normalized
$h = $f[2]; # lop off excess hundreds
$t = $f - $f[0]; # lop off excess tens
$f[2] > $b[2] && Let($b, $h, $e) and return $h; # centuries skipped
$f[1] > $b[1] && Let($b, $t, $e) and return $t; # decades skipped
return 0 # only excess ones left now, we hope
}
#------------------------------------------------------------------------------
sub Let($$$) # check that a cut passes muster
{
my ($b, $c, $e) = @_; # begin, cut (the candidate), end
my $f = $c - 1; # 0 to 100 means values 0 to 99
return 0 unless $b < $c && $c < $e; # must be distinct and in order
my @b = Dig($b); # break into ones, tens, hundreds
my @f = Dig($f);
# Require b to f be only a 1-digit transition. One digit can change,
# higher digits must not, and lower digits must be [0-9].
return $b[0] <= $f[0] && Equ([ @b[1..2] ], [ @f[1..2] ])
|| $b[1] < $f[1] && Equ([ $b[2], $b[0], $f[0] ], [ $f[2], 0, 9 ])
|| $b[2] < $f[2] && Equ([ @b[0..1], @f[0..1] ], [ 0, 0, 9, 90 ])
} # note, tens are 2 digits, hence 90
#------------------------------------------------------------------------------
sub Run(\$) # compose a regex for the whole range
{
my $a = shift; # the range, broken into subintervals
return 0 unless @$a >= 2; # size check (an assert, more or less)
print join(", ", @$a), "\n" if $opt_debug;
my $b = $a->[0]; # begin, end pair for each subinterval
my $e = MAX + 1;
my @r = (); # result strings
$#r = @$a - 2; # preallocate the (up to 5) subranges
for (my $i = 0; $i < @$a - 1; $i++, $b = $e)
{
$e = $a->[$i + 1]; # end value for this subinterval
$r[$i] = Hop($b, $e); # regex for this subinterval
}
my $s = join("|", @r); # form the disjunction
$s = "(" . $s . ")" if @r > 1; # parens as needed
return $s
}
#------------------------------------------------------------------------------
sub Hop($$) # simple regex for one subinterval
{
my ($b, $e) = @_; # begin, end
my $f = $e - 1; # final value that's actually in-range
my @b = Dig($b); # break into ones, tens, hundreds
my @f = Dig($f);
my $r = "0"; # result string
for (my ($i, $t) = (1, 10); $i < @b; $i++, $t *= 10)
{
$b[$i] /= $t; # descale the digits to be digits
$f[$i] /= $t;
}
if ($b[0] <= $f[0] && Equ([ @b[1..2] ], [ @f[1..2] ]))
{
$r = Led($b[2]); # lead digit
$r .= Mid($b[2], $b[1]); # middle digit
$r .= Ran($b[0], $f[0]); # digit range
}
elsif ($b[1] < $f[1] && Equ([ $b[2], $b[0], $f[0] ], [ $f[2], 0, 9 ]))
{
$r = Led($b[2]); # lead digit
$r .= Ran($b[1], $f[1]); # digit range
$r .= "?" unless $b[2] || $b[1]; # maybe optional
$r .= "[0-9]"; # any digit
}
elsif ($b[2] < $f[2] && Equ([ @b[0..1], @f[0..1] ], [ 0, 0, 9, 9 ]))
{
$r = Ran($b[2], $f[2]); # digit range
$r .= "?" unless $b[2]; # maybe optional
$r .= "[0-9]"; # any digit
$r .= "?" unless $b[2]; # maybe optional
$r .= "[0-9]"; # any digit
}
return $r # return the regex
}
#------------------------------------------------------------------------------
sub Dig($) # resolve a number into 3 digits
{
my $n = shift; # the number
return 0 unless $n < 1000; # size check (an assert, more or less)
my @a = ($n % 10, $n % 100, $n); # result array = ones, tens, hundreds
for (my $i = 2; $i; $i--) # some tens and ones still mixed in
{
$a[$i] -= $a[$i - 1]; # distill using next mixed up digit
}
return @a # return the 3-digit array
}
#------------------------------------------------------------------------------
sub Equ(\$\$) # compare two lists for equality
{
my ($a, $b) = @_;
return 0 unless @$a == @$b; # lengths must match exactly
for (my $i = 0; $i < @$a; $i++)
{
return 0 if $a->[$i] != $b->[$i]; # all entries must match
}
return 1 # only get here when all matched
}
#------------------------------------------------------------------------------
sub Led($) # stringize lead digit, add ? if 0
{
my $n = shift; # the lead digit
return $n ? "$n" : "0?"; # string result or optional lead 0
}
#------------------------------------------------------------------------------
sub Mid($$) # stringize middle digit, add ? if 00
{
my ($h, $t) = @_; # hundreds and tens digits
return $h || $t ? "$t" : "0?" # string result or optional middle 0
}
#------------------------------------------------------------------------------
sub Ran($$) # stringize single-digit range
{
my ($b, $f) = @_; # beginning and final digit in range
return $b == $f ? "$b" : # single digit
$b == $f - 1 ? "[$b$f]" # two possible digits
: "[$b-$f]"; # at least 3 possible digits
}
#------------------------------------------------------------------------------
sub Opt() # process command line options
{
GetOptions("help!", "debug!")
or Err();
$opt_help = 0 unless defined $opt_help;
$opt_debug = 0 unless defined $opt_debug;
disable diagnostics unless $opt_debug;
Use() if $opt_help || !(_at_)ARGV;
return @ARGV
}
#------------------------------------------------------------------------------
sub Err(;$$) # optional description and exit code
{
my ($msg, $err) = @_;
warn "\nError: $msg\n" if $msg;
my $pgm = $0; # program name
$pgm =~ s|.*/||; # strip path if any
warn "\nType '$pgm --help' for usage info (or -h also works)\n\n";
exit($err || 1) # default to exit 1 (never 0)
}
#------------------------------------------------------------------------------
sub Use() # usage note
{
print <DATA>;
exit 0
}
#------------------------------------------------------------------------------
exit 1 # should never get here
#------------------------------------------------------------------------------
__END__
Usage: iprangex.pl [options] dotted-IP-range
Purpose: Compose a regular expression to match a dotted decimal IP range for
the four octets in an IP address.
Options: --help show this help info
--debug enable diagnostics
Discussion: A dotted-IP-range is four octet-ranges separated by dots. An octet
range is of the form "first-last", where "-last" is optional. Must
conform to 0 <= first <= last <= 255. Prints out a regex that will
match that IP range. For example, the regex for octet range 54-123
is 0?5[4-9]|0?[6-9][0-9]|1[01][0-9]|12[0-3]. The octet regexes are
parenthesized if needed, dotted together, and then printed out.
--
v2.0b, 8/05/02, Mike Peeler <zconcept(_at_)netcom(_dot_)com>
_______________________________________________
procmail mailing list
procmail(_at_)lists(_dot_)RWTH-Aachen(_dot_)DE
http://MailMan.RWTH-Aachen.DE/mailman/listinfo/procmail