On Thu, Apr 22, 2004 at 01:11:21AM +0000, Mark wrote:
| > This seems like a great idea, and I'd like to start doing the same
| > filtering. I don't know much about broadband PTR's, and it would save
| > me a lot of learning time if you would be willing to share your regex
| > rules. Would you be willing to do this? I'm sure that others would
| > like to see your rules, as well.
|
| Checking my code, it turns out it is actually but a single regex. :) (the
| code around is just exempting rules). It will undoubtedly wrap in this
| email; so, unwrap it first before using it!
|
| /\b[a-z]*?(\d{1,3}[.-]\d{1,3}[.-]\d{1,3}|([a-z]?dsl(am)?|dhcp|tnt|ipt|pool|n
| as|cvx|leased|slip|subscriber|d[iu]p|modem(cable)?|ppp(oe)?|dyn(amic)?|dial(
| up)?|cust(omers?)?|(end)?users?|d?cliente?)\d*?)\b/i
Here's the relevant excerpt from my code. Patches welcome.
# deny at connect if the hostname (any of the following)
# 1) contains the IP address
# 2) contains half the IP address plus some other indicator
# 3) looks like a lots-of-parts-foo-123.blah-blah.net address
# 4) matches comcast notation
# 5) matches t-dialin.net notation
use warnings;
use strict;
sub on_connect {
my ($filter) = @_;
my $addr = $filter->{txn}->get_peeraddr;
my $host = $filter->{txn}->get_peerhost;
if (not length $host) {
# $filter->{txn}->dprint("looks_like_broadband: no host; bailing.");
$filter->set_opinion (OC_DECLINE);
return;
}
my $addr_r = join ".", reverse split /\./, $addr;
my ($byte_1, $byte_2, $byte_3, $byte_4) = split /\./, $addr;
my $hex_string = sprintf "%x%x%x%x", ($byte_1, $byte_2, $byte_3, $byte_4);
my $hex_backwards = sprintf "%x%x%x%x", ($byte_4, $byte_3, $byte_2, $byte_1);
my $reversed_hex = reverse $hex_string;
my $half_addr = join ".", $byte_3, $byte_4;
my $half_addr_r = join ".", $byte_4, $byte_3;
local $_ = $host;
DEBUG and $filter->{txn}->dprint("testing host=$_, bytes $byte_1 $byte_2
$byte_3 $byte_4");
# 0) special cases
# A17-250-248-85.apple.com looks like a consumer broadband machine (contains
17.250.248.85)
# 207-171-168-37.amazon.com looks like a consumer broadband machine (contains
207.171.168.37)
if ($_ =~ qr(\.(
apple\.com
|amazon\.com)
$)ix) {
$filter->set_opinion(OC_DECLINE);
return;
}
# 1) contains the IP address
# some hostnames are just bizarre:
d233-64-245-253.clv.wideopenwest.com[64.233.253.245]
# dhcp024-210-034-053.columbus.rr.com[24.210.34.53]
if (/$byte_1/ && /$byte_2/ && /$byte_3/ && /$byte_4/
or
/$hex_string/i || /$hex_backwards/i || /$reversed_hex/i
) {
$filter->set_opinion(OC_DENY, "$_ looks like a consumer broadband machine
(contains $addr)");
return;
}
# 2a) contains half the IP address plus some other indicator
# 2b) or some really long hexadecimal string plus the indicator
# fia83-8.dsl.hccnet.nl[62.251.8.83]
# f88114.upc-f.chello.nl[80.56.88.114]
# CPE0004e2372711-CM000a73666706.cpe.net.cable.rogers.com
# h0040f43bf622.ne.client2.attbi.com[24.60.108.81]
# 68.198.72.117(ool-44c64875.dyn.optonline.net)
# ip9135f361.speed.planet.nl[145.53.243.97]
if ( /(docsis|cable|dsl|adsl|dhcp|cpe|client|optonline|^ip)/i
and
(/$byte_3/ && /$byte_4/
or
/[0-9a-f]{8,}/i
)
) {
$filter->set_opinion(OC_DENY, "$_ looks like a consumer broadband machine");
return;
}
# ip132-AS69.baltnet.ru[217.168.69.132]
if (my ($match) = grep { /$byte_3\D+$byte_4/ || /$byte_4\D+$byte_3/ } split
/\./, $_) {
$filter->set_opinion(OC_DENY, "$_ looks like a consumer broadband machine
($match matches $addr)");
return;
}
# 3a) looks like a lots-of-parts-foo-123.blah-blah.net address
# ca-morpark-cuda1-zone7-b-159.vnnyca.adelphia.net[67.23.129.159]
if (/\b$byte_4\b/
and
length ($_) > 40
and
(8 <= (my @parts = split (/[-.]/, $_)))
and
/\b(adelphia)\b/i
) {
$filter->set_opinion(OC_DENY, "$_ looks like a consumer broadband machine
(has IP byte in very long hostname)");
return;
}
# peter(_at_)ixp(_dot_)jp writes:
#
# For OCN and the likes here:
#
# p3228-ipad63marunouchi.tokyo.ocn.ne.jp
# p2046-ipbf406marunouchi.tokyo.ocn.ne.jp
# ^^^^
# ip adsl
# ip bflets (my home has FTTH)
#
# 3b) p2046-ipbf406marunouchi.tokyo.ocn.ne.jp
if (/\.ne\.jp$/i
and
length ($_) > 30
and
(5 <= (my @parts = split (/[-.]/, $_)))
and
/$byte_4-(ipad|ipbf)\d/i
) {
$filter->set_opinion(OC_DENY, "$_ looks like a consumer broadband machine
(has IP byte in very long hostname)");
return;
}
# vectant, basically everything is resold flets service
#
# typically they would be in .ne.jp
#
# 3c) d162.HtokyoFL6.vectant.ne.jp
if (/^\D{1,2}\d+$byte_4/
and
/\.vectant\.ne\.jp$/i
) {
$filter->set_opinion(OC_DENY, "$_ looks like a consumer broadband machine
(has vectant hostname with $byte_4)");
}
# 4) matches comcast notation
# pcp115957pcs.clmntn01.nj.comcast.net
# pcp01332753pcs.columb01.pa.comcast.net
# bgp542174bgs.ewndsr01.nj.comcast.net[68.38.144.91]
if (/^(bgp|pcp)\d+(pcs|bgs)\.\w+\.\w+\.comcast\.net$/i
) {
$filter->set_opinion(OC_DENY, "$_ looks like a comcast broadband client");
return;
}
# 5) looks like a dialup client
# pD9528134.dip.t-dialin.net[217.82.129.52]
if (/\d{5,}.*(dial(in|up)|modem)/i
) {
$filter->set_opinion(OC_DENY, "$_ looks like a dialup client");
return;
}
$filter->set_opinion(OC_DECLINE);
return;
}