#!/usr/bin/perl
use strict;
my ($country, $useProxy, $proxyList, @countURL, $maxTime, 
	$quiet, %blockedPorts, $pauseOnCompletion, $dnsInstalled, 
	$ignorePorts, @ignorePorts, $port, %options, $progname, 
	$VERSION, $useConnect, $useConnectProxy, $nameServer,
	$commandLine);
use Getopt::Long;

## remove the '#' from statements below to activate them, or add 
#	one to deactivate them. The defaults below are set for running 
#	from the UAE.

$progname = $0;
$progname =~ s,.*[/\\],,;  # use basename only
$progname =~ s/\.\w*$//; # strip extension, if any

$VERSION = sprintf("%d.%02d", q$Revision: 1.62 $ =~ /(\d+)\.(\d+)/);
$commandLine = $progname." ".join(' ', @ARGV);

# modify this if you are sure (and please let me know of any confirmed 
# changes to this list)
%blockedPorts=(	'UAE'=>
	'#80#81#119#880#1080#3128#8001#8002#8003#8080#8081#8088#8888#', 
				'KSA'=>'#80#81#119#8000#8080#', 
				'free'=>'');

my @getopt_args = (
	'f=s',  # read in a configuration file
	'c=s',	# country to do the tests for
    't=s',	# timeout
    'i=s',	# ignore port array
    'h',	# print usage
    'v',	# print version
    'q',	# no extra info, beeps etc.
    'p=s',	# URL for the proxy to use to get the proxy list, or 'none'
    'C',    # use CONNECT to test proxies
    'P=s',	# use CONNECT via this proxy
    'w',	# on completion, wait for a GUI user to close the window
    'n=s',	# count array [start,finish]
);

Getopt::Long::config("noignorecase", "bundling");
unless (GetOptions(\%options, @getopt_args)) {
    usage();
}
if ($options{'v'}) {
#    my $DISTNAME = 'findProxy ' . $VERSION;
    my $DISTNAME = 'findProxy.zip';
    die <<"EOT";
This is findProxy $VERSION ($DISTNAME)

Author: wayne\@nym.alias.net

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
EOT
}

usage() if $options{'h'} || !(@ARGV or $options{'f'});#later

LWP::Debug::level('+') if $options{'x'};

#set parameters. command line settings always override, config 
#file params next, and coded defaults only if nothing else defines it
setConfigFileParams();	#options from config file
setCodeDefaultParams();	#resolve the remaining params by coded defaults

############### don't modify anything below here  ###############
############### unless you know what you're doing ###############

unless (eval('require LWP'))	{	# this also means 
					# HTML::Parser is installed			 
	die "can't continue because libwin-perl (LWP) package is not ".
	"installed"}; 
unless ($dnsInstalled = eval('require Net::DNS'))	{
	print "Not using DNS name resolution because the Net::DNS module\n";
	print "is not installed";
	print "If you use ActiveState Perl, try \n";
	print "ppm install Net-DNS\n";
	print "in a command window\n"};

my $knownBlockedPorts=$blockedPorts{$country};
foreach $port (@ignorePorts)	{$knownBlockedPorts.="$port#"};
unless ($quiet)	{print "ignoring proxies on ports: $knownBlockedPorts\n"};
$proxyList =~ s|\\|/|;	#sanitize the proxyList URL, in case people 
						#cut and paste.
my $origProxyList=$proxyList; #save this for later
if (@countURL)	{
	die "bad range in \$countURL" if ($countURL[0]>$countURL[1]);
	($proxyList=~/%COUNT%/) or die "%COUNT% for the ". 
	"URL counter must be in the URL you specify as \$proxyList";
	$proxyList=~s/%COUNT%/$countURL[0]/;
	}
else	{@countURL=(0,0)};

my $working='';
my %tested=();

# Create a user agent object to get the proxy list(s) to examine
use LWP::UserAgent;
use LWP::Debug qw(level); #level('+');
my $ua = new LWP::UserAgent;
$ua->agent("AgentName/0.1 " . $ua->agent);
#$ua->agent("Mozilla/4.0 (compatible; MSIE.5.5; Windows NT 5.0) " . 
#	$ua->agent);
if ($useProxy)	{$ua->proxy(['http'], $useProxy)}
else {$ua->proxy(['http'],'')}; # do this to avoid any environment setting

my $nrTested=0; my $nrWorking=0; 
my $URLCounter = $countURL[0];	#start counter at the lowest number
#print "$URLCounter\n";
select STDOUT; $|++;	#make sure the output appears while testing.
while ($URLCounter <= $countURL[1])	{
	# Create a request for the list 
	my $req = new HTTP::Request GET => $proxyList;
	print "getting $proxyList\n";
	# Pass request to the user agent and get a response back
	my $res;
	$res = $ua->request($req) or die "can't get $proxyList";
	# Check the response outcome
	if ($res->is_success) {
		my $list=$res->content."\n";	#make sure there's a new 
						#line for the regex
		unless ($quiet) {print "$list\n"};

		# This stuff is gross. Note the order is important. 
		# One day I'll do it right, with HTML::Parser
		# At the moment, they're not exactly robust - a small 
		# change to the format of any of these pages  might 
		# screw the regexes up.
		
		#for http://proxycheck.spylog.ru/
		$list =~ s/<td><font size=-1>\&nbsp;/ /ig;
		#for proxys4all message board
		$list =~ s/\n(\: )+/\n/ig;
		#for http://www8.big.or.jp/~000/CyberSyndrome/ 
		$list =~ s/<li><a href="JavaScript:OpenWindow\('//ig;
		#for http://allfreeweb.hypermart.net/cgi-bin/dbase/db.cgi
		$list =~ s/<TD bgcolor="#999999">//ig;
		$list =~ s/<font face="'Trebuchet MS',Arial,Verdana,sans-serif"//ig;
		$list =~ s/size="2"><b>//ig; 
		#for http://www.hackzone.ru/member/nethack/proxies.htm
		$list =~ s/\r\n        tppabs="http.*?\r\n/\r\n/ig; 
		#for http://www.hackzone.ru/member/nethack/proxies.htm
		$list =~ s/<td bgcolor="#0000\w\w">/ /ig; 
		#for some of MVlads lists
		$list =~ 
			s/(<li>)?<a href="(cgi-bin\/)?fp.pl\?hosts=[^\&]*\&ports=\d+">//ig; 
		$list =~ 
			s|22</a> <a href="shdb.pl\?key=||ig;
		#for http://www.uic.nnov.ru/~visy/internet/eng/sponsor/proxy.htm
		$list =~ s/<\/TD>\r\n\r\n<TD>\r\n<CENTER>/ /ig;	
		$list =~ s/\d+ stat //ig;
		#for http://www.hackzone.ru/member/nethack/proxies.htm
		$list =~ s/<\/td>[\r\n]+        //ig;	
		#for http://ssdd.virtualave.net/all00.html
		$list =~ s/<\/td>[\r]?\n\s+<td>\&nbsp<\/td>[\r]?\n\s+<td>/ /ig; 
		#for http://www.cl.spb.ru/sparta/list1.htm
		$list =~ s/<\/p>\r\n    <p>/\r\n/ig;	
		#for general html lists
		$list =~ s/<LI>//ig; $list =~ s/<P ALIGN="left">//ig;
		#for general html tables
		$list =~ s/<TR>//ig;  			
		$list =~ s/<[\/]?TD>/ /ig; $list =~ s/<TD ALIGN=RIGHT>//ig;
		#for my posted proxy.pac file format
		$list =~ s/proxies\[proxyNr\+\+\]//ig;	
		#for cyberarmy format
		$list =~ s/port//ig; $list =~ s/<!--\d\d\d\d--><b>//ig;	
		#for proxy.pac again, but maybe elsewhere too
		$list =~ s/["=]//ig; $list =~ s|\n//|\n|ig;		
		#for http://proxy.nikto.net/all_list.htm
		$list =~ s/<br>//ig; $list =~ s/<[\/]?b>/ /ig;	
		#for some posted lists
		$list =~ s/> //ig;
		#statProxy format
		$list =~ s/^proxy //ig;
		print "Starting the tests ... please wait\n";
		while 
		($list =~ m|^\s*([^\s:\(]+\.[^\s:]+)[\s:]+[+-]?(\d+).*[\n\r]+|mg)
			{
			my ($host, $port) = ($1, $2);
			next unless (($host) and ($port));
			next if ($host =~ /^bess-/);	#:-)
			next if (($port<=0) or ($port>65535) or 
				($host !~ /^[a-zA-Z0-9\.\-]+$/));
			next if ($tested{"$host:$port"});
			if (($host) and ($knownBlockedPorts !~ /#$port#/)) {
				print "testing: $host:$port"; $nrTested++;
				my $proxyUa = new LWP::UserAgent;
				# don't wait for longer than $maxTime for a 
				# page - if it doesn't arrive within that 
				# time, we're not interested anyway.
				$proxyUa->timeout($maxTime);
				
				# here's an agent string; use one day, if we think 
				#	we're being discriminated against :-)
				# User-Agent: Mozilla/4.0 (compatible; MSIE.5.5; Windows NT 5.0)

				# need to do the CONNECT here, if $useConnect, and 
				# change the proxy for the agent below. 
				# The only way I could get this to work was to 
				# replace the http.pm protocol in 
				# C:\Perl\site\lib\LWP\Protocol\http.pm. 
				# The object-oriented stuff either doesn't have 
				# the support, or my understanding of how it should 
				# be done is inadequate. I'd appreciate some advice 
				# here - I hate this.
				
				my $proxyReq;
				if ($useConnect)	{
					$proxyUa->proxy(['http'], $useConnectProxy);
					$proxyReq = new HTTP::Request CONNECT => 
						"http://$host:$port/";
					print " via CONNECT ($useConnectProxy) ";
					}
				else	{
					$proxyUa->proxy(['http'], "http://$host:$port/");
					$proxyReq = new HTTP::Request GET => 
						'http://www.sex.com/';
					print " via GET ";
					};
				my $startTime=time;
				my $proxyRes=$proxyUa->request($proxyReq);
				my $myTime=time-$startTime;
				if (($proxyRes->is_success) and 
					($proxyRes->content=~
					 /fucking/) and 
					 ($myTime<$maxTime))	{
					unless ($quiet) {print "\7 \t***** working ******"}
					print "\n";
					$working .= "$host\t$port\t$myTime#";
					$nrWorking++}
				else {print "\n"};
				undef $proxyUa;
				}
			$tested{"$host:$port"}=1;
			};
		}
	else	{
		print "can't get $proxyList\n";
		exit unless ($origProxyList =~ /%COUNT%/);
		};
	$URLCounter++;
	($proxyList = $origProxyList) =~ s/%COUNT%/$URLCounter/;
	};

my %hosts=();	#for dupe checking and sorting
print "\n*********************\n";
print "findProxy v$VERSION report (", ((localtime(time))[5]+1900), 
	"-", sprintf("%02d", (localtime(time))[4]+1), "-", 
	sprintf("%02d", (localtime(time))[3]), ")\n";
print "Using: $commandLine\n\n";
if ($nrTested==0) {
	print "No unblocked proxies to test at this URL\n";
	print "using proxy: ", $useProxy ?"$useProxy" :"none", ".\n";
	print "Maybe there are none there or maybe the format is one\n";
	print "I don't understand - tell wayne and he will try to fix me.\n";
	}
elsif ($nrWorking==0)	{
	print "No usable proxies were found\n";
	}
else	{
	print "Working proxies found:\n", 
	my ($host, $port, $myTime, $hostFqdn);
	foreach my $line (split(/#/, $working))	{
		($host, $port, $myTime)=split(/\t/, $line);
		$hostFqdn='';
		if ($dnsInstalled)	{
			my $res = new Net::DNS::Resolver;
			$res->nameservers($nameServer); #can't trust the default one
			my $query = $res->search($host);
			if ($query) {
				foreach my $rr ($query->answer) {
					my $type=$rr->type;
					if ($type eq "A")	{
						$hostFqdn=$host;
						$host=$rr->address;
						}
					elsif ($type eq "PTR")	{
#						$hostFqdn=$rr->ptrdname;
						$hostFqdn.=$rr->ptrdname.", ";
						};
					}
				}
			else {
				if ($host=~/^\d+\.\d+\.\d+\.\d+$/) {
#					$hostFqdn = "dns lookup failed: ".$res->errorstring;
					$hostFqdn='none';	#we have an IP address, but can't 
										#print a fqdn. No problems.
					}
				else {
#					$hostFqdn = $host." dns lookup failed: ".$res->errorstring;
					$host='';	#we have an fqdn which does not resolve 
								#to an IP address. This one will simply 
								#be removed from the output. No warning needed.
					};
				};
			undef $res;
			undef $query;
			};
		if ($host) {$hosts{"$host:$port"} = $myTime.":".($hostFqdn=~/^(.*?)(, )?$/)[0]};
		};
	#now sort by IP address (alphabetically) and print
	printf "%-16s %-6s%-5s%-50s\n", 'host', 'port', 'time', 'FQDN';
	foreach my $hp (sort keys %hosts)	{
		($host, $port)=split(/:/, $hp); 
		($myTime, $hostFqdn)=split(/:/, $hosts{$hp});
		printf "%-16s:%-6u%-5u%-30s\n", $host, $port, $myTime, $hostFqdn;
		};
	};
	
print "\7\7"; # two beeps to finish :-)
#wait for the MS crowd ...
if ($pauseOnCompletion) {print "press enter to exit "; <STDIN>};	
1;

sub setConfigFileParams {
	if ($options{'f'}) { #user specified a configuration file
		open(CONFIG, "<$options{'f'}") or die "can't open the config file";
		my @config=<CONFIG>;
		close(CONFIG);
		foreach (@config) {
			chomp;
			next if (/^#/);
			my ($var,$val) = split(/=>/);
			next if (eval("defined(".$var.")")); #make sure the command line options 
								#override any config file settings
			eval("$var=".$val); #set the variables to config file values
			};
		};
	};
	
sub setCodeDefaultParams {
	# some useful defaults if the user doesn't add them on the command 
	# line or in the config file

	#proxy list url from the command line, or from config file, or my 
	#page by default
	$proxyList=(shift or $proxyList or 
	 'http://www.angelfire.com/wy/1waynes/checkedProxies.html');

	$country = ($options{'c'} or 'UAE');#country is UAE if unspecified
	if ($country=~/uae/) {$country='UAE'};
	if ($country=~/ksa/) {$country='KSA'};

	#there is a problem with finding some users' name servers, so ...
	unless ($nameServer) {
		if ($country=~/UAE/) {$nameServer='194.170.1.99'} #ns3.emirates.net.ae
		#else {$nameServer='198.6.1.82'}; #alter.net-1
		#else {$nameServer='198.7.0.1'}; #access.net (panix)
		elsif ($country=~/KSA/) {$nameServer=' 212.26.18.3'} #ns1.isu.net.sa
		else {$nameServer='198.7.0.1'}; #alter.net-2
		};

	#no proxy required for local files; sometimes a special one is needed 
	# when the site is blocked. Choose your local ISPs proxy when you can.

	# assume all users will be using some proxy to get the list to check
	# if this is not the case, they probably know they need to set 'none'
	# on the command line ... maybe :-)
	$useConnect=defined($options{'C'});

	#and we *must* have a proxy to use, default to a known CONNECT 
	#proxy, rather than to the one used to get the list (usually, the 
	#CONNECT is a local ISP proxy, so it may have the proxy list blocked.
	if ($useConnect) {$useConnectProxy=($options{'P'} or $useConnectProxy 
		or $options{'p'} or 'http://194.170.168.236:8080/')};

	#but if no list proxy is specified, it makes sense to use 
	#the CONNECT proxy if specified to get the list
	$useProxy= ($options{'p'} or $useProxy or $options{'P'} or 
		'http://proxy1.emirates.net.ae:8080/'); 

	if ($proxyList=~m|file://|i) {$useProxy=0}; #no proxy for file:// urls

	#and make sure we don't accidentally use an environment 
	#proxy, if 'none' is specified
	if ($useProxy =~ /^none$/) {$useProxy=0};
	if ($useConnectProxy =~ /^none$/) {$useConnectProxy=0};

	$maxTime = ($options{'t'} or $maxTime or 60);
	$quiet = ($options{'q'} or $quiet or 0);
	$pauseOnCompletion = ($options{'w'} or $pauseOnCompletion);
	if (!$pauseOnCompletion) {
		$pauseOnCompletion = ($^O=~/MSWin32/ ?1 :0)};
	$ignorePorts = ($options{'i'} or $ignorePorts or '[23, 12000]');
	if (ref(eval($ignorePorts)) eq 'ARRAY')	{
		@ignorePorts=@{eval($ignorePorts)}}
	else {$ignorePorts[0]=$ignorePorts};#was just a single number 
										#entered on the command line

	my $countURL = eval(($options{'n'} or ''));
	if (defined($countURL)) {
		if (ref($countURL) ne 'ARRAY') {
			die "count specification needs to be [start,finish]"
			};
		@countURL=@{$countURL}
		}
	else {undef(@countURL)};
	#print "@countURL\n";
};

sub usage	{	# command line options
    die <<"EOT";
Usage: $progname [-options] <url>
    -f <file>       Use a configuration file (command line options still 
                    override)
    -c <country>    Set country to test proxies for (default UAE)
    -i              Ignore proxies on these ports (default [23, 12000])
    -t <timeout>    Set timeout value (default 60 secs)
    -p <proxyurl>   URL for the proxy to use to get the proxy list, 
                    or 'none' (default a UAE proxy)
    -v              Show program version
    -h              Print this message
    -q              Quiet - don't list the proxy list, and no beeps etc.
                    (default noisy)
    -C              Use CONNECT to test each proxy in the list (default off)
    -P <proxyurl>   Use CONNECT via this proxy
                    Note: if no -p proxy is specified, the default 
                    for -C is a UAE CONNECT proxy
    -w              On completion, wait for a GUI user to close 
                    the window (default no wait)
    -n [start,end]  Check a range of URLs (increment %COUNT% in the url)
EOT
}

__END__

=head1 NAME

findProxy

=head1 SYNOPSIS

findProxy [-f <config file>] [-c <country>] [-t <timeout>] [-i [low, high]] 
[-h] [-v] [-q] [-w] [-p <url>] [-C] [-P <url>] [-w] [-n [start,end]] <url>

=head1 OPTIONS

=over 4

=item -f <file>       Use a configuration file to set some of the options 
                      (and some of the internal variables)
                      Note that all can be set, and they will not override 
                      any command line options specified. 

=item -c <country>    Set country to test proxies for (default=UAE)

=item -i              Ignore proxies on these ports (default [23, 12000])
    
=item -t <timeout>    Set timeout value (default 60 secs)
    
=item -p <proxyurl>   URL for the proxy to use to get the proxy list, 
                      or 'none' (default a UAE proxy)
                     
=item -q              Quiet - don't list the proxy list, and no beeps etc.
                      (default noisy)

=item -C              Use CONNECT to test each proxy in the list (default off)

=item -P <proxyurl>   CONNECT via this proxy.
                      
=item -w              On completion, wait for a GUI user to close 
                      the window (default no wait)
                     
=item -n [start,end]  Check a range of URLs (increment %COUNT% in the 
                      url) from <start> to <end>

=item -h help         Prints out a brief help message.

=item -v version      Prints program version number

=back

=head1

=head1 DESCRIPTION

This program will run anywhere (where the Perl module LWP is installed) 
and find web proxies for you to use. It will get the list of proxies 
to check from a file, or from the web (you specify a URL in both cases)

The proxies it finds acceptable have the following properties:

=over 4

=item * they are accessable from where the program is run (not port, or 
address blocked)
  
=item * they are listening/alive

=item * they are not access-controlled for you

=item * they *do* proxy properly
 
=item * they are uncensored (I use http://www.sex.com/ as a test, and 
assume that if this is not censored by the proxy then there 
is no censorship in place).
 
=item * they are at least as fast as you specify (the speed measured 
depends on where and when you run this program, of course).

=back

I think proxy-hunting is, in the end, a waste of time, but many 
wanted this, so here it is. It's been a lot of fun witing it!
If you want to become independent of all this scrabbling for 
proxies, try a shell account.

=head1 REQUIREMENTS:

=over 4

=item * a system with perl (http://www.activestate.com/ for win32 systems, 
http://www.perl.org/ for linux)

=item * perl module LWP installed

=item * perl module Net::DNS (optional, but will produce more useful 
results)

=back

=head1

=head1 VERSION RELEASE HISTORY

=over 4

=item * 00/10/5 v1.0 released

=item * 00/10/6 v1.1 released: enough small bugfixes, new formats handled 
etc. to justify a new release already.

=item * 00/10/8 v1.2 released: more list formats handled, 'counting' URLs, 
URL timeout=$maxTime added (but see the note under 'problems'), 
beeps on each successful proxy, 2 beeps to finish.

=item * 00/10/13 v1.3 released: formatting fixed up some, some minor bugs 
squashed, unix line length problems fixed, major cleanup, pause 
at end for windows users, documentation improved, URL timeout now 
almost works :-), ports to ignore added.

=item * 00/11/7 v1.4 released: renamed this program from findproxyNew to 
findProxy. The old findProxy was renamed to findProxyVlad (because 
that's what it was originally for, and what it does best). 
For searches of Vlad's database, findProxyVlad does a lot better 
than this program.

=item * 00/11/16 v1.5 released: added all the command line option code
and CONNECT capability. The command options override anything set
by editing the program file itself; the CONNECT capability allows 
checking of blocked proxies via a local tunnelling proxy.
Either because the support is not there, or because I'm thick, 
I had to modify an LWP file to get CONNECT to work. It's http.pm 
and must be downloaded with this version of findProxy and used to 
replace the original one. Functions other than CONNECT will work 
without this.

=item * 00/11/23 v1.51 released: added the perl pod stuff, so now 
you can produce a pretty html page with the links and instructions 
about running this program. Added a couple of new URLs to proxy lists.

=item * 00/11/30 v1.52 released: minor cleanups, handle new proxy 
list formats

=item * 00/12/7 v1.53 released: minor change because the reference 
page changed it's text. Also fixed a bug which caused an environment 
specified proxy to be used for file:// protocol accesses.This version 
is available at: 
http://www.angelfire.com/wy/1waynes/ and 
http://mojonation.net/id/3z7y9LMigGAmzvevgoxvtn5Et98/

=item * 00/12/7 v1.54: minor fixes. -p none option 
allowed now. Groks an additional format, or two. Documented the need 
to place quotes around command line URLs with shell metacharacters.

=item * 00/12/12 v1.55 released: minor fixes - case sensitivity of -c 
value relaxed. Now prints all the fqdns in the summary at the end.

=item * 00/12/17 v1.56 released: allows a separate 
specification of the CONNECT proxy to use. Reference page changed 
again - made the key string one of their search engine keywords 
'fucking'. That should always be there :-) 
Changes to result printout.
Changes to DNS server used (defaults to UAE if country is UAE, 
alter.net if not. This might slow some (e.g. KSA) down, but at 
least it guarantees the fqdn lists.

=item * 00/12/21 v1.6 released: fixes v1.56 insistence on a 
value for -C option, by adding -P <connect proxy url> option. 
Removed the required value for -C. Fixed bad ordering for defaulted 
CONNECT proxy choice. Added the configuration file option and some 
example config files.

=item * 00/12/22 v1.61: groks statProxy output format.

=item * 01/1/3 v1.62: changed the way command line options, config 
line options and code defaults were resolved. No reports, but it 
probably didn't work as expected in some cases before.

=back

=head1

=head1 TO RUN IT

=over 4

=item 1. edit the program (if necessary) to include different default 
values. I've included sensible defaults for the UAE, but KSA etc. will 
need to specify things (the proxy to use to get the list, for example) 
on the command line, or by editing the code.

=item 2. start it from the command shell by typing:

 perl findProxy.pl [options] <proxyListURL>
 or
 perl findProxy.pl [options] <proxyListURL> >file.txt
 
 Note that any shell metacharacters in your URL need to be escaped, 
 or the URL will get munged by the shell before findProxy even sees 
 it. An example of such a character is &. You can play safe by 
 enclosing the URL in double quotes in MS Windows or single quotes 
 in Unix; or just escape each special character in the URL by preceding 
 it with ^ in MS Windows or \ in Unix. Spaces in your URL will also 
 require this special handling.
 
=item 3. print this documentation in a pretty format by typing:

 perldoc findProxy.pl
 
 and maybe convert it to html, but this doesn't work yet! :-)
 
 perldoc findProxy.pl | pod2html > findProxy.html

=back

=head1 TO DO

=over 4

=item * check each proxy for HTTPS and CONNECT capability (to which ports? 
443, 119?)

=item * make the proxies get a fresh copy of the ref web page, so the timing 
is not affected by caching

=item * check that the proxy given in $useProxy actually works first!

=item * do the GETs in parallel to speed things up.
to do this kind of thing for multiple proxies to a single url, 
need to do one of these:

=over 4

=item 1. make a bunch of ua's and set a proxy for each (ie not using 
the parallel package at all)

=item 2. use a callback at the beginning of each connection which 
sets the proxy. 

=back

=item will either of these work?

=item * use HTTP::Parse instead of the mess with regexes.

=item * check proxies for anonymity. Need a local daemon which just 
passes the request to findProxy parsing code. This will work for 
prisonwalls and free locations; not for real firewalls or NATs.
For them, I'd need to send a request to a proxy env checker and 
parse the response.

=item * handle cookies (egroups messages, for example)

=back

=head1

=head1 KNOWN PROBLEMS

=over 4

=item * The URL timeout doesn't work well in Windows (maybe Windows 
signal handling fails - no surprise there). 
In particular, this means that the program takes about 2*$maxTime 
(sometimes) to get past checking squirrel.owl.de:33434, even though 
the timeout is set to $maxTime.

=item * The proxy list format varies from place to place, so it's hard 
to handle in a general way. Tell me the URL of a proxy list you think 
it doesn't handle and I'll add it in.
The way to tell that this is happening is to run with '-c free'.
If the program doesn't say 'testing' for even one 
proxy and you can see one or more address:port in the list which 
should have been tested, then the program has probably screwed up.

=back

=head1

=head1 VARIABLES INSIDE THE CODE

You must specify at least $proxyList if you want any 
useful results.

$country: the country where this program is to run; should be one 
of: 'UAE', 'KSA', or 'free'. Use 'free' if you want to run 
this from an unblocked account (e.g. Panix, plaguesplace etc.).
If you don't know which ports are blocked for you, run this with 
$country='free' and the program will still find useable 
proxies for you, but it will take a *very* long time to do it 
($maxTime for each blocked proxy in the list!).

$useProxy: add a proxy only if necessary - this proxy must be 
accessible from wherever this program is run. It's not 
necessary at all if the program is run from an account in 
a free country (it's only needed to get the initial list 
of proxies to check). If $proxyList is a 'file://' URL, it 
is not needed either. If $proxyList is not censored from 
where you are, you may use the local proxy for speed 
(note that they will see you accessing the initial list; 
they won't see anything else in their proxy logs).

$proxyList: the URL for the initial list of proxies to check. 
This may be a web URL, or a file URL.
If it's a web URL, then the program has the same problems 
(or not) that your browser has in getting the list. 
That is, if you need to configure a proxy into your browser 
(as in the UAE, KSA etc.), then you *must* set a good value 
for $useProxy. It may be ok for many of the web lists of 
proxies, if you just use the local ISPs proxy (since most of 
these lists are not blocked anyway).
If $proxyList is a 'file://' URL, the list of proxies to check 
is on your local disk, so no proxy is needed. If you are 
running this on a Unix system, this URL will probably need to 
contain the full path to your local file.
Note also that the format of the list of working proxies 
produced on completion of this program is a format which is 
understood by this program, so you can run this program 
periodically to recheck them.

$useConnect: specifies that the $useProxy is able to CONNECT to 
the various proxies to be checked. $useProxy will be 
used to check the proxies (via a CONNECT operation). 
This option is only of use if you plan to use CONNECT (through 
$useProxy) to talk to the proxies you find (for example, if 
you plan to use HTTPort to browse).

@countURL: set this to the range to count if you want a URL to be 
counted. For example, there might be a list at 
http://some.site.com/list10.html and at 
http://some.site.com/list11.html ... 
http://some.site.com/list67.html. In this example, set 
the URL to http://some.site.com/list%COUNT%.html and 
@countURL=(10,67) as shown below. If there are leading zeros
(say it starts at 01, and goes to 67), then you must 
do the first range separately (as @countURL=(1,9)). 
This is good for checking proxies from bulletin board messages 
etc. Make the counter go through each message (assuming they're 
numbered).

$maxTime: The program takes a few seconds to download the reference 
web page (www.sex.com). If it takes longer than $maxTime, 
the proxy will be considered unsuitable. If you're really 
having trouble finding suitable proxies, make this very high 
- say 60 seconds. The program may take a long time to check 
each proxy though.
Some of the ones that have been around a 
long time are still working, but take more than a minute to 
get a page. If you want to see these in your 'working' list, 
make $maxTime even higher - I think this is a waste of time 
though, you could never browse with such proxies. A couple 
on the multi-proxy data list are like this.
	Note that this time is only good the first time you run the 
program because, after that, www.sex.com might be cached at 
the proxy and should come back faster. I experimented with 
options to make the proxy get a fresh page, and will include 
some of this later.

$quiet: If you've seen the program working before and you don't 
want to see the 'testing' messages, you don't want to hear 
the single beeps, etc., set this to 1. Otherwise to 0.

$pauseOnCompletion: set this if you don't want the program to 
exit (mainly so that people can double-click this thing 
and keep the command window open at the end).

@ignorePorts: Many lists seem to include port 23 (telnet) proxies.
I've never seen one which works for web browsing, and they take a 
long time to timeout, so you can set this if you see them.
Same for Socks (port 1080). You can also use this if you are 
not in one of the countries known to the program - just set 
country to 'free' and @ignorePorts to all the ports you know 
are blocked for you.


=head1 PROXY LIST URLS

These urls contain lists of proxies worth testing (most are 
full of rubbish, but they needed to be tested once anyway).
You can copy these from the perldoc output (without the quotes) 
and use them on the findProxy command line. Note - I've broken 
the long ones up in a way suitable for including in the code; to 
use these from the command line, leave out the '.' (remove the 
quote and dot from the end first of the two lines, remove the 
quote from the beginning of the next line and make them 
one line.

check the proxies listed in a local file (no proxy needed)
$proxyList='file://k:/toCheck.txt';	
$proxyList='file://K:\checkedProxies.html';

$proxyList=
'file://K:\wayne\webSites\www.angelfire.com.wy.1waynes\checkedProxies.html';

$proxyList='file://k:/test1.html';	

check the old list on my site
$proxyList='http://www.angelfire.com/wy/1waynes/0proxies.html'; 

check the 'checked' list on my site
$useProxy='http://proxy1.emirates.net.ae:8080/';
$proxyList='http://www.angelfire.com/wy/1waynes/checkedProxies.html';

check my local proxy.pac proxies :-)
$proxyList='file://c:/inetpub/wwwroot/proxy.pac';	

 not one of these is good
$proxyList='http://proxys4all.cgi.net/public.shtml';

 so much for the 'top ten' - not one is good
$proxyList='http://proxys4all.cgi.net/topten.shtml';

$proxyList='http://people.freenet.de/cybatron/ProxySammlung.txt'; 
@ignorePorts=(23);	#cybatron has many port 23 proxies - ignore them

MVlad's list of non-standard port proxies - not one good proxy here!
$proxyList='http://proxies.hotmail.ru/nonstand.htm'; 

the mult-proxy 'data' file. Not one useful proxy here. Some appear 
as 'working' only if $maxTime is set to 120 seconds!
$proxyList='http://proxy.nikto.net/all_list.htm'; #nothing

no proxies working here.
$proxyList='http://members.tripod.com/ex-online/proxy.htm'; 

hey! I got one here.
$proxyList='http://poisk.hypermart.net/cgi-bin/proxies.cgi'; 

$proxyList='http://www.cl.spb.ru/sparta/list3.htm'; #nothing
$proxyList='http://proxylist.virtualave.net/plfull.htm'; #nothing.

one here
$proxyList='http://www.fortunecity.com/skyscraper/millenit/1069/'; 

$proxyList='http://www.403-security.org/Resources/proxys.htm'; #none
$proxyList='http://poisk.hypermart.net/cgi-bin/proxies.cgi'; #one
$proxyList='http://www.coolrunning.tmfweb.nl/list1.html'; #none

 none here
$proxyList=
'http://www.geocities.com/SiliconValley/Network/1120/fp-anonim.html'; 

$proxyList='http://allfreeweb.hypermart.net/cgi-bin/dbase/'.
'db.cgi?db=proxy&userid=user&pw=user&login=LogonEnabled=Yes&so='.
'ascend&view_records=1&enabled=yes&output=min&mh=2000&sb=3&ID=*'; # none

$proxyList='http://www.winsqueeze.com/nppp2/pppplus.cgi/http/www'.
'.cyberarmy.com/lists/proxy/'; # cyberarmy list, no proxies found

$proxyList='http://www8.big.or.jp/~000/CyberSyndrome/'; #quite a few here

 none here
 $proxyList='http://www.thescream.f2s.com/html/support/proxylst.htm';

=head2 Sites requiring a special proxy (from the UAE, at least)

 $useProxy='http://194.95.207.2:8000/'; 

 Not even one good proxy here - why is it blocked???
 $proxyList='http://www.cyberarmy.com/lists/proxy/'; 

 nothing!
 $proxyList='http://www.hackzone.ru/member/nethack/proxies.htm'; 

 nothing
 $proxyList='http://evy.8m.com/proxy.htm';

 nothing
 http://dinfo.org/cdjp/Intro/proxy012599
 note there is a date attached to this one, there are probably 
 more recent scans there somewhere.

=head2 Sites requiring 'counting' URLs.

Vlad's back! findProxyVlad is better for this, but let's 
try here. Good for a max of 50 records (add multiples of 50 
to nskip to get the next sets). These limitations 
don't apply in findProxyVlad. Counting helps here.

 $proxyList='http://tools.rosinstrument.com/cgi-bin/sps.pl?pattern='.
 '%3B100-09-%COUNT%&max=50&nskip=0&file=proxlog.csv';
 @countURL=(10,30);

i only tried 14-17
 @countURL=(14,17);
 $proxyList='http://proxyworld.hypermart.net/%COUNT%.txt'; 

 $proxyList='http://ssdd.virtualave.net/all%COUNT%.html';
 my @countURL=(65,74);

 $proxyList = 'http://proxys4all.cgi.net/wwwboard/messages/%COUNT%.html';
 my @countURL=(4000,4159); #nothing

=head2 To try in future:

 
 
 
 http://members.tripod.com/~webmaster5/proxylist.txt # was easier 
 to edit first. no useful proxies there anyway
 
=head1 AUTHOR

wayne@nym.alias.net (http://www.angelfire.com/wy/1waynes/)

=cut

