First pass at adding key files
This commit is contained in:
		
							
								
								
									
										955
									
								
								site/slowtwitch.com/cgi-bin/articles/admin/GT/URI/HTTP.pm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										955
									
								
								site/slowtwitch.com/cgi-bin/articles/admin/GT/URI/HTTP.pm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,955 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library
 | 
			
		||||
#
 | 
			
		||||
#   GT::URI::HTTP
 | 
			
		||||
#   Author  : Aki Mimoto (support@gossamer-threads.com)
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: HTTP.pm,v 1.30 2002/06/27 18:36:02 aki Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2000 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Gets HTTP data
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::URI::HTTP;
 | 
			
		||||
# ===============================================================
 | 
			
		||||
    use strict;
 | 
			
		||||
    use GT::Socket;
 | 
			
		||||
    use GT::Base;
 | 
			
		||||
    use GT::CGI;
 | 
			
		||||
    use Exporter ();
 | 
			
		||||
    use vars     qw/$ATTRIBS $EOL @ISA $DEBUG @EXPORT_OK $BASE_PORT /;
 | 
			
		||||
 | 
			
		||||
    @ISA    = ('GT::Base', 'Exporter');
 | 
			
		||||
 | 
			
		||||
    $EOL    = "\015\012";
 | 
			
		||||
    $DEBUG  = 0;
 | 
			
		||||
 | 
			
		||||
    $ATTRIBS = {
 | 
			
		||||
        URL              => '',
 | 
			
		||||
        request_method   => 'GET',
 | 
			
		||||
        parameters       => {},
 | 
			
		||||
        agent_name       => 'Mozilla/4.73 [en]',
 | 
			
		||||
        agent_host       => 'X11; I; Linux 2.2.15-4mdk i586',
 | 
			
		||||
        headers          => {},
 | 
			
		||||
        cookies          => {},
 | 
			
		||||
        resource_attribs => {},
 | 
			
		||||
        resource_data    => undef,
 | 
			
		||||
        max_down         => 200000,
 | 
			
		||||
        debug            => 0
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    @EXPORT_OK = qw/ parse_url deparse_url build_path build_parameters /;
 | 
			
		||||
 | 
			
		||||
sub pending {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# return true if there is some data to be picked up
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $tics    = shift;
 | 
			
		||||
    my $sock    = $self->{'sock'} or return 1;
 | 
			
		||||
    return $sock->pending();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub EOF {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# return true if the system has finished downloading
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $sock    = $self->{'sock'};
 | 
			
		||||
    return ( $sock and $sock->fh() ? $sock->EOF() : 1 );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub gulp_read {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $tics    = shift;
 | 
			
		||||
    return $self->do_iteration( $tics );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub resource_attrib {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $attrib  = shift;
 | 
			
		||||
    my $attribs = $self->{resource_attribs} || {};
 | 
			
		||||
    return $attrib ? $attribs->{$attrib} : $attribs;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub do_iteration {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# useful in a loop, this is a generally non blocking way to get a page
 | 
			
		||||
# it will return nulls until it gets the page (and returns the page)
 | 
			
		||||
#
 | 
			
		||||
    my ( $self, $fh, $sock, $pause );
 | 
			
		||||
    $self  = shift;
 | 
			
		||||
    $pause = shift;
 | 
			
		||||
    $sock  = $self->{'sock'};
 | 
			
		||||
 | 
			
		||||
# if we haven't done the request yet
 | 
			
		||||
    if ( not( $self->{'resource_downloaded'} or $sock ) and $self->{URL} ) {
 | 
			
		||||
        $self->fetch();
 | 
			
		||||
        $sock = $self->{'sock'};
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# if we can't connect
 | 
			
		||||
    if ( $self->{'flag_err'} ) {
 | 
			
		||||
        return '';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    $sock->fh() or return;
 | 
			
		||||
 | 
			
		||||
# only if the resource hasn't been downloaded yet..,
 | 
			
		||||
    if ( !$self->{'resource_downloaded'} ) {
 | 
			
		||||
 | 
			
		||||
# if there's data pending, download it!
 | 
			
		||||
        if  ( $sock->pending( $pause ) ) {
 | 
			
		||||
# get as much data as we can
 | 
			
		||||
            while ( my $buf = $sock->gulpread() ) {
 | 
			
		||||
                $self->{'resource_data'} .= $buf;
 | 
			
		||||
            }
 | 
			
		||||
# if we need to parse the server headers
 | 
			
		||||
            $self->_parse_server_headers();
 | 
			
		||||
 | 
			
		||||
        }
 | 
			
		||||
# we've run out of data, let's stop
 | 
			
		||||
        if ( $sock->EOF() ) {
 | 
			
		||||
 | 
			
		||||
# make sure that the object know
 | 
			
		||||
            $self->{'resource_downloaded'} = 1;
 | 
			
		||||
 | 
			
		||||
            return $self->{'resource_data'};
 | 
			
		||||
        }
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        return $self->{'resource_data'};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
sub fetch {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# fetches a page and handles CGI if requested
 | 
			
		||||
#
 | 
			
		||||
    my ( $self, $url, $parameters, $sock, $request_str, $host, $port, $dirs, $file, $params, $fh, $order );
 | 
			
		||||
 | 
			
		||||
    $self       = shift;
 | 
			
		||||
    $url        = shift || $self->{'URL'} or return undef;
 | 
			
		||||
    $parameters = shift || $self->{'parameters'};
 | 
			
		||||
 | 
			
		||||
# save the headers
 | 
			
		||||
    $self->{'URL'}  = $url;
 | 
			
		||||
    $self->{'parameters'}   = $parameters;
 | 
			
		||||
 | 
			
		||||
    ( $host, $port, $dirs, $file, $params, $order ) = parse_url($url);
 | 
			
		||||
 | 
			
		||||
# see if we can connect to the host
 | 
			
		||||
    $sock = GT::Socket->open({
 | 
			
		||||
        'host'      => $host,
 | 
			
		||||
        'port'      => $port,
 | 
			
		||||
        '_debug'    => $self->{_debug},
 | 
			
		||||
        'debug'     => $self->{debug},
 | 
			
		||||
        'max_down'  => $self->{max_down}
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
# submit the request to the host
 | 
			
		||||
    if ( $sock ) {
 | 
			
		||||
        $fh = $sock->fh();
 | 
			
		||||
        $self->debug( "Connected to $url" ) if ($self->{_debug});
 | 
			
		||||
        $request_str = $self->_create_request( $url, $parameters, $host, $port, $dirs, $file, $params, $order );
 | 
			
		||||
        $self->debug( "Sent request:\n$request_str" ) if ($self->{_debug});
 | 
			
		||||
        print $fh $request_str;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    else {
 | 
			
		||||
        $self->debug( "Couldn't connect to $url. Reason: $GT::Socket::error" ) if ($self->{_debug});
 | 
			
		||||
        $self->{resource_attribs} = {
 | 
			
		||||
            ERROR_CODE      => 'CANTCONN',
 | 
			
		||||
            ERROR_MESSAGE   => 'Cannot connect to server. Reason: $GT::Socket::error'
 | 
			
		||||
        };
 | 
			
		||||
        $self->{resource_downloaded} = 1;
 | 
			
		||||
        $self->{flag_err}            = 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# store the socket
 | 
			
		||||
    return( $self->{'sock'} = $sock );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _parse_server_headers {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# this next part parses the server headers
 | 
			
		||||
#
 | 
			
		||||
    my ( $self, $doc, $fh, $sock, $line, $error_code, $error_message, %resource_attribs, @lines );
 | 
			
		||||
    $self   = shift;
 | 
			
		||||
 | 
			
		||||
    $doc    = shift || $self->{'resource_data'};
 | 
			
		||||
 | 
			
		||||
    if ( !$self->{'headers_parsed'} ) {
 | 
			
		||||
 | 
			
		||||
# check is there is a full header yet
 | 
			
		||||
        ( $doc =~ s/((.*?))(\012\015\012\015|\015\012\015\012|\015\015|\012\012)//so ) or return undef;
 | 
			
		||||
        $line  = $1;
 | 
			
		||||
        $self->{'resource_data'}    = $doc;
 | 
			
		||||
        $line  =~ s/(\012\015|\015\012)/\n/g;
 | 
			
		||||
 | 
			
		||||
# now get all the lines of the header..,
 | 
			
		||||
        @lines = grep { not /^(\n|\r)+$/ } split /(\n|\r)/, $line;
 | 
			
		||||
 | 
			
		||||
# read the first header, which is the status line
 | 
			
		||||
        $line  = shift @lines;
 | 
			
		||||
        ($error_code, $error_message) = $line  =~ /[^ ]*[ ]*(\d*)(.*)/;
 | 
			
		||||
 | 
			
		||||
# loop until we're done the headers
 | 
			
		||||
        while ( $line = shift @lines  ) {
 | 
			
		||||
            if ( $line =~ /^(.*?): (.*)/ ) {
 | 
			
		||||
                $resource_attribs{lc($1)} = $2;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        $resource_attribs{ERROR_CODE} = $error_code;
 | 
			
		||||
        $resource_attribs{ERROR_MESSAGE} = $error_message;
 | 
			
		||||
 | 
			
		||||
# now parse cookies since we're dealing in http
 | 
			
		||||
        if ( $resource_attribs{'set-cookie'} ) {
 | 
			
		||||
            $resource_attribs{'COOKIE'} = _parse_cookie_string( $resource_attribs{'set-cookie'} );
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
# add the headers to the local object
 | 
			
		||||
        $self->{'resource_attribs'} = \%resource_attribs;
 | 
			
		||||
 | 
			
		||||
# now set the headers parsed flag
 | 
			
		||||
        $self->{'headers_parsed'}   = 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return \%resource_attribs;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _parse_cookie_string {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
    my $str      = shift;
 | 
			
		||||
    my @cookie   = grep $_, map { s/^\s*(.*?)\s*$/$1/; $_ } split /[;]/, $str;
 | 
			
		||||
 | 
			
		||||
# as first element of the cookie is a hash
 | 
			
		||||
    my $value    = shift @cookie;
 | 
			
		||||
 | 
			
		||||
    my %segments = map { /^([^=]*)=(.*)$/ ? ( lc($1) => $2 ) : ( $_ => undef ) } @cookie;
 | 
			
		||||
 | 
			
		||||
    if ( $value =~ /^([^=]*)=(.*)$/ ) {
 | 
			
		||||
        $segments{name}     = GT::CGI::unescape($1);
 | 
			
		||||
        $segments{value}    = GT::CGI::unescape($2);
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        $segments{name}     = GT::CGI::unescape($value);
 | 
			
		||||
        $segments{value}    = undef;
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    return \%segments;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _hash_to_phash {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# creates a parameter hash from a standard hash
 | 
			
		||||
    my $hash    = shift or return undef;
 | 
			
		||||
 | 
			
		||||
    my $phash   = {
 | 
			
		||||
        map {
 | 
			
		||||
            $_ => [ $$hash{$_} ]
 | 
			
		||||
        } keys %{$hash}
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    return $phash;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _combine_phash {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
    my ( $a, $b ) = @_;
 | 
			
		||||
    foreach my $key ( %$b ) {
 | 
			
		||||
        my $container = $a->{$key};
 | 
			
		||||
        if ( ref $container ) {
 | 
			
		||||
            push @{$container}, @{$b->{$key}};
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            $a->{$key} = [ $container, @{$b->{$key}} ];
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    return $a;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _create_request {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# creates the http request to be submitted to the server
 | 
			
		||||
    my ( $sock, $fh, $get_str, $req, $headers, $method, $tmp );
 | 
			
		||||
 | 
			
		||||
    my ( $self, $url, $parameters, $host, $port, $dirs, $file, $params, $order ) = @_;
 | 
			
		||||
 | 
			
		||||
    $method     = lc($self->{'request_method'});
 | 
			
		||||
 | 
			
		||||
# now handle the request
 | 
			
		||||
    $get_str    = uc($method) . ' /' . build_path($dirs,$file);
 | 
			
		||||
 | 
			
		||||
# if this is a get request, if there are any parameters we want to pass, handle that
 | 
			
		||||
    if ( $method eq 'get' ) {
 | 
			
		||||
        $tmp    = build_parameters( _combine_phash( $params, _hash_to_phash($parameters) ), $order );
 | 
			
		||||
        $get_str.= $tmp ? "?$tmp" : "";
 | 
			
		||||
    }
 | 
			
		||||
    $get_str    .= " HTTP/1.0$EOL";
 | 
			
		||||
 | 
			
		||||
# handle the host field...,
 | 
			
		||||
    $get_str    .= "Host: $host" . ( ( $port == 80 ) ? "" : ":$port" ) . $EOL;
 | 
			
		||||
 | 
			
		||||
# handle the useragent field
 | 
			
		||||
    $get_str    .= "User-Agent: $self->{agent_name} ($self->{agent_host})" . $EOL;
 | 
			
		||||
    $get_str    .= "Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*" . $EOL;
 | 
			
		||||
    $get_str    .= "Accept-Language: en" . $EOL;
 | 
			
		||||
    $get_str    .= "Accept-Charset: iso-8859-1,*,utf-8" . $EOL;
 | 
			
		||||
    $get_str    .= "Connection: close" . $EOL;
 | 
			
		||||
 | 
			
		||||
# handle cookies
 | 
			
		||||
    if ( my $cookiestr = $self->_create_cookie_request() ) {
 | 
			
		||||
        $get_str    .= $cookiestr . $EOL;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# handle any extra headers
 | 
			
		||||
    $headers    = $self->{'headers'};
 | 
			
		||||
    $get_str    .= join(
 | 
			
		||||
        "",
 | 
			
		||||
        map {
 | 
			
		||||
            $_ = ucfirst;
 | 
			
		||||
            "$_: $$headers{$_}$EOL";
 | 
			
		||||
        } keys %{$headers}
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
# handle if the request is a post
 | 
			
		||||
    if ( $method eq 'post' ) {
 | 
			
		||||
        my $request = build_parameters( { %{$params}, %{_hash_to_phash($parameters)} } );
 | 
			
		||||
        $get_str .=
 | 
			
		||||
            "Content-type: application/x-www-form-urlencoded$EOL" .
 | 
			
		||||
            "Content-length: " .length($request)."$EOL$EOL".
 | 
			
		||||
            $request;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    else {
 | 
			
		||||
        $get_str .= "$EOL$EOL";
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $get_str;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub cookie {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub _create_cookie_request {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# creates the string the client sends to the server for a cookie
 | 
			
		||||
    my $self        = shift;
 | 
			
		||||
 | 
			
		||||
    my $cookies     = $self->{'cookies'};
 | 
			
		||||
    my $str         = join "; ", grep $_, map {
 | 
			
		||||
 | 
			
		||||
                                        my $value   = $cookies->{$_};
 | 
			
		||||
 | 
			
		||||
                                        if ( ref $value eq 'GT::CGI::Cookie'  ) {
 | 
			
		||||
                                            GT::CGI::escape($value->{-name}) . "=" . GT::CGI::escape( $value->{-value} );
 | 
			
		||||
                                        }
 | 
			
		||||
 | 
			
		||||
                                        elsif ( ref $value eq 'HASH' ) {
 | 
			
		||||
                                            GT::CGI::escape($value->{name}) . "=" . GT::CGI::escape( $value->{value} );
 | 
			
		||||
                                        }
 | 
			
		||||
 | 
			
		||||
                                        elsif ( $value and not ref $value ) {
 | 
			
		||||
                                            GT::CGI::escape($_) . "=" . GT::CGI::escape($value);
 | 
			
		||||
                                        }
 | 
			
		||||
 | 
			
		||||
                                    } keys %{$cookies};
 | 
			
		||||
 | 
			
		||||
    return $str ? "Cookie: $str" : "";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub method {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# sets the acquisition method for parameter quering
 | 
			
		||||
#
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $method  = shift or return $self->{'form_method'};
 | 
			
		||||
 | 
			
		||||
    if ( lc $method eq 'get' ) {
 | 
			
		||||
        $self->{'form_method'} = lc $method;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    elsif ( lc $method eq 'post' ) {
 | 
			
		||||
        $self->{'form_method'} = lc $method;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    elsif ( lc $method eq 'head' ) {
 | 
			
		||||
        $self->{'form_method'} = lc $method;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $self->{'form_method'};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub load_parameter {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# loads the passed parameters into the database
 | 
			
		||||
    my ( $self, $params, $key, $parameters );
 | 
			
		||||
    $self   = shift;
 | 
			
		||||
    $params = ( ref $_[0] ? $_[0] : { @_ } ) or return undef;
 | 
			
		||||
    $parameters = $self->{'parameters'};
 | 
			
		||||
 | 
			
		||||
# add each entry into the object's attribs
 | 
			
		||||
    foreach $key ( keys %{$params} ) {
 | 
			
		||||
        $$parameters{ $key } = $$params{ $key };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $self->{'parameters'};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub delete_parameter {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# deletes the named parameters
 | 
			
		||||
    my ( $self, $params, $key, $parameters );
 | 
			
		||||
    $self   = shift;
 | 
			
		||||
    $params = ref $_[0] ? $_[0] : [ @_ ];
 | 
			
		||||
    $parameters = $self->{'parameters'};
 | 
			
		||||
 | 
			
		||||
# now delete the parameters
 | 
			
		||||
    foreach $key ( @{$params} ) {
 | 
			
		||||
        delete $$parameters{$key};
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub parse_url {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# parses the url and converts it into host
 | 
			
		||||
# assume HTTP/1.0 format
 | 
			
		||||
#
 | 
			
		||||
my ( $url, $host, $port, $path, $file, $page, @dirs, $param, %params, $item, @order );
 | 
			
		||||
    $url    = shift;
 | 
			
		||||
 | 
			
		||||
# first, do the initial parse
 | 
			
		||||
    $url    =~ s/^[a-zA-Z]+:\/\///i;
 | 
			
		||||
    $url    =~ /([\w.\-]*):?(\d*)\/?([^?]*)\??(.*)/i;
 | 
			
		||||
 | 
			
		||||
    $host   = $1;
 | 
			
		||||
    $port   = $2 || 80;
 | 
			
		||||
    $path   = $3;
 | 
			
		||||
    $param  = $4;
 | 
			
		||||
 | 
			
		||||
# now handle the paths
 | 
			
		||||
    $path   =~ s/^\s*(.*?)\s*$/$1/;
 | 
			
		||||
 | 
			
		||||
# the paths are not simply directories
 | 
			
		||||
    if ( $path !~ /\/$/ ) {
 | 
			
		||||
        $path   =~ s/([^\/]*)$//;
 | 
			
		||||
        $file   = $1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# handle the directories
 | 
			
		||||
    @dirs   = grep $_, split /\//, $path;
 | 
			
		||||
 | 
			
		||||
# and finally parse out the parameters if there are any
 | 
			
		||||
    foreach $item ( split /\&/, $param ) {
 | 
			
		||||
        if ( $item =~ /^(.*?)=(.*?)$/ ) {
 | 
			
		||||
            push @order, $1;
 | 
			
		||||
            push @{$params{$1}}, $2;
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            push @order, $item;
 | 
			
		||||
            push @{$params{$item}}, undef;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# now return the collection
 | 
			
		||||
    return ( $host, $port, \@dirs, $file, \%params, \@order );
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub deparse_url {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# takes a number of parameters and builds the request url
 | 
			
		||||
    my ( $url, $host, $port, $path, $file, $page, $dirs, $param, $params, $item );
 | 
			
		||||
 | 
			
		||||
    ( $host, $port, $dirs, $file, $params ) = @_;
 | 
			
		||||
 | 
			
		||||
    $url    = $host;
 | 
			
		||||
 | 
			
		||||
# build the host portion
 | 
			
		||||
    if ( $port != 80 ) {
 | 
			
		||||
        $url .= ":80";
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# now build the page access
 | 
			
		||||
    $url    .= "/" . build_path( $dirs, $file );
 | 
			
		||||
 | 
			
		||||
# and finally handle the parameters if any
 | 
			
		||||
    if ( $params and keys %{$params} ) {
 | 
			
		||||
        $url    .= "?" . build_parameters( $params );  
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $url;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub build_path {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# takes a dir array and builds a path
 | 
			
		||||
    my ( $path, $dirs, $page );
 | 
			
		||||
 | 
			
		||||
    $dirs   = shift;
 | 
			
		||||
    $page   = shift;
 | 
			
		||||
 | 
			
		||||
    if ( $dirs ) {
 | 
			
		||||
        $path   = join( "/", grep $_, @{$dirs} );
 | 
			
		||||
        $path   &&= "$path/";
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $path .= $page;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub build_parameters {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# takes a hash of parameters and builds a string with it
 | 
			
		||||
    my ( $params, $paramstr, @paramlist, $order );
 | 
			
		||||
 | 
			
		||||
    $params = shift or return undef;
 | 
			
		||||
    $order  = shift;
 | 
			
		||||
 | 
			
		||||
    if ( keys %{$params} ) {
 | 
			
		||||
 | 
			
		||||
        # Deep copy hash and array refs only.
 | 
			
		||||
        my $tmp;
 | 
			
		||||
        while (my ($k, $v) = each %$params) {
 | 
			
		||||
            if (! ref $v) {
 | 
			
		||||
                $tmp->{$k} = $v;
 | 
			
		||||
            }
 | 
			
		||||
            elsif (ref $v eq 'HASH') {
 | 
			
		||||
                $tmp->{$k} = {};
 | 
			
		||||
                foreach my $k1 (keys %{$params->{$k}}) { $tmp->{$k}->{$k1} = $params->{$k}->{$k1}; }
 | 
			
		||||
            }
 | 
			
		||||
            elsif (ref $v eq 'ARRAY') {
 | 
			
		||||
                $tmp->{$k} = [];
 | 
			
		||||
                foreach my $v1 (@{$params->{$k}}) { push @{$tmp->{$k}}, $v1; }
 | 
			
		||||
            }
 | 
			
		||||
            else { $tmp->{$k} = $v; }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if ( $order ) {
 | 
			
		||||
 | 
			
		||||
            foreach my $key ( @$order ) {
 | 
			
		||||
                my $value = shift @{$tmp->{$key} || []};
 | 
			
		||||
                if ( $tmp->{$key} and not @{$tmp->{$key}} ) {
 | 
			
		||||
                    delete $tmp->{$key}
 | 
			
		||||
                }
 | 
			
		||||
                $key = GT::CGI::escape( $key );
 | 
			
		||||
                push @paramlist, ( defined $value ? "$key=$value" : $key );
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        } 
 | 
			
		||||
 | 
			
		||||
        require GT::CGI;
 | 
			
		||||
        $paramstr = join "&", ( @paramlist, 
 | 
			
		||||
                map {
 | 
			
		||||
                    my $key     = GT::CGI::escape($_);
 | 
			
		||||
                    my $list    = $tmp->{$_};
 | 
			
		||||
 | 
			
		||||
                    ( ref $list eq 'ARRAY' ) ?
 | 
			
		||||
                        join "&", 
 | 
			
		||||
                            map {
 | 
			
		||||
                                "$key" . ( defined $_ ? "=". GT::CGI::escape($_) : '' );
 | 
			
		||||
                            } @{$list}
 | 
			
		||||
                            : 
 | 
			
		||||
                        "$key=$list";
 | 
			
		||||
 | 
			
		||||
                    } keys %{$tmp} );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $paramstr;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub get {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# simple blocking method to get some data
 | 
			
		||||
    my ( $uri, $options, $HTTP, $doc );
 | 
			
		||||
 | 
			
		||||
    shift; # get rid of the first parameter
 | 
			
		||||
    $uri        = shift or return undef;
 | 
			
		||||
    $options    = ( @_ and ref $_[0] ) ? shift : {@_};
 | 
			
		||||
 | 
			
		||||
    $HTTP       = new GT::URI::HTTP($options);
 | 
			
		||||
    $HTTP->fetch($uri);
 | 
			
		||||
 | 
			
		||||
    while ( not defined( $doc = $HTTP->do_iteration(-1) ) ) {};
 | 
			
		||||
 | 
			
		||||
    return $doc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
 | 
			
		||||
__END__
 | 
			
		||||
 | 
			
		||||
=head1 NAME
 | 
			
		||||
 | 
			
		||||
GT::URI::HTTP - HTTP request broker.
 | 
			
		||||
 | 
			
		||||
=head1 SYNOPSIS
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTP;
 | 
			
		||||
 | 
			
		||||
  print GT::URI::HTTP->get( "http://www.gossamer-threads.com" );
 | 
			
		||||
 | 
			
		||||
=head1 DESCRIPTION
 | 
			
		||||
 | 
			
		||||
GT::URI::HTTP makes requests and retrieves resources from http servers (not
 | 
			
		||||
limited to text). Can be used stand-alone or through GT::URI
 | 
			
		||||
 | 
			
		||||
=head1 Method List
 | 
			
		||||
 | 
			
		||||
Socket Handling
 | 
			
		||||
 | 
			
		||||
  sub pending()            Returns true if data awaiting
 | 
			
		||||
  sub EOF()                Returns open/closed status of socket
 | 
			
		||||
  sub gulp_read()          Alias to do_iteration
 | 
			
		||||
  sub do_iteration()       Basic looping function that downloads resources in the background
 | 
			
		||||
 | 
			
		||||
Acquisition
 | 
			
		||||
 | 
			
		||||
  sub fetch()              Tell the object which URL to acquire
 | 
			
		||||
  sub method()             The method of acquisition
 | 
			
		||||
  sub load_parameter()     Add a item for CGI parameters
 | 
			
		||||
  sub delete_parameter()   Delete a CGI parameter
 | 
			
		||||
  sub resource_attrib()    Headers related to resource and server
 | 
			
		||||
  sub get()                Simple resource aquisition function
 | 
			
		||||
 | 
			
		||||
Support Methods (must be imported)
 | 
			
		||||
 | 
			
		||||
  sub parse_url()          Decomposes a URL into constituent parts
 | 
			
		||||
  sub deparse_url()        Takes those parts and builds an URL  
 | 
			
		||||
  sub build_path()         Takes a list of directories and builds a path
 | 
			
		||||
  sub build_parameters()   Takes a hash of parameter->values and builds a CGI request string
 | 
			
		||||
 | 
			
		||||
=head1 Basics
 | 
			
		||||
 | 
			
		||||
=head2 Getting a resource, the simple way
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Just want a single item? Call GT::URI::HTTP->get and all the magic will be done
 | 
			
		||||
for you.
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTP;
 | 
			
		||||
  my $buf = GT::URI::HTTP->get( "http://www.gossamer-threads.com/" );
 | 
			
		||||
 | 
			
		||||
Get based requests are permissable as well:
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTP;
 | 
			
		||||
  my $buf = GT::URI::HTTP->get( "http://search.yahoo.com/bin/search?p=gossamer+threads" );
 | 
			
		||||
 | 
			
		||||
If extra options need to be set, simply append the options to the parameter
 | 
			
		||||
list, like follows.
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTP;
 | 
			
		||||
  my $buf = GT::URI::HTTP->get( "http://search.yahoo.com/bin/search?p=gossamer+threads", { request_method => 'POST'  } );
 | 
			
		||||
 | 
			
		||||
=head2 When just the document is not enough
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
If a new GT::URI::HTTP object is instantiated, much more control is available,
 | 
			
		||||
including facilities for non-blocking downloading of pages.
 | 
			
		||||
 | 
			
		||||
To create a GT::URI::HTTP object, call new with all the options required:
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTP;
 | 
			
		||||
 | 
			
		||||
  my $http = new GT::URI::HTTP(
 | 
			
		||||
 | 
			
		||||
                    # URL to acquire (optional)
 | 
			
		||||
                        'URL'               => '',
 | 
			
		||||
 | 
			
		||||
                    # Can also be set to POST/GET/HEAD (optional)
 | 
			
		||||
                        'request_method'    => 'GET',
 | 
			
		||||
 | 
			
		||||
                    # a hash of keys pointing to an arrayref of values to be sent to the server
 | 
			
		||||
                    # {
 | 
			
		||||
                    #    'key' => [ 'value1', 'value2'... ],
 | 
			
		||||
                    # }
 | 
			
		||||
                    # (optional)
 | 
			
		||||
                        'parameters'        => {},
 | 
			
		||||
 | 
			
		||||
                    # Name portion of the User-Agent: string the server acquires (optional)
 | 
			
		||||
                        'agent_name'        => 'Mozilla/4.73 [en]',
 | 
			
		||||
 | 
			
		||||
                    # Host-from portion of the User-Agent: string the server acquires (optional)
 | 
			
		||||
                        'agent_host'        => 'X11; I; Linux 2.2.15-4mdk i586',
 | 
			
		||||
 | 
			
		||||
                    # To prevent downloading of 80Tb files, but if you still wanted to, set this to 0 (optional)
 | 
			
		||||
                        'max_down'          => 200000
 | 
			
		||||
 | 
			
		||||
                    );
 | 
			
		||||
 | 
			
		||||
If URL has been specified in the options, for interactions with a CGI, you can
 | 
			
		||||
set extra parameters with $http->load_parameter(). Finally, loop on
 | 
			
		||||
$http->do_iteration() until the value is defined. To replicate the "simple get"
 | 
			
		||||
example:
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTP;
 | 
			
		||||
  $|++;
 | 
			
		||||
 | 
			
		||||
  my $http = new GT::URI::HTTP(
 | 
			
		||||
 | 
			
		||||
                              URL        => 'http://search.yahoo.com/bin/search',
 | 
			
		||||
 | 
			
		||||
                              # can also use the following:
 | 
			
		||||
                              parameters => {
 | 
			
		||||
                                              'p' => [ 'gossamer threads' ]
 | 
			
		||||
                                            }
 | 
			
		||||
 | 
			
		||||
                              );
 | 
			
		||||
 | 
			
		||||
  my $doc;
 | 
			
		||||
  while ( not defined( $doc = $http->do_iteration() ) ) {
 | 
			
		||||
          # do something here while waiting for the resource to arrive 
 | 
			
		||||
          print "." 
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
  print $doc, "\n\n";
 | 
			
		||||
 | 
			
		||||
Beyond the resource, the http server often supplies extra information in a
 | 
			
		||||
header. To access this information, use $http->resource_attrib().
 | 
			
		||||
 | 
			
		||||
Appending this code to the previous example, a list of all the associated server
 | 
			
		||||
headers can be seen:
 | 
			
		||||
 | 
			
		||||
  my $attribs = $http->resource_attrib();
 | 
			
		||||
  foreach my $key ( sort keys %{$attribs} ) {
 | 
			
		||||
    print "$key => $attribs->{$key}\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
=head2 Support Methods
 | 
			
		||||
 | 
			
		||||
In addition to the basic fetching abilities, since the module must parse HTTP
 | 
			
		||||
URLs, the methods used to do so have been made public.
 | 
			
		||||
 | 
			
		||||
These methods decompose URLs into datastructures that make URLs easily studied
 | 
			
		||||
or modified and then reconstructed.
 | 
			
		||||
 | 
			
		||||
However, these routines have not been polished for useability so beware! The
 | 
			
		||||
following is a very basic example:
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTP qw/ parse_url deparse_url build_path build_parameters /;
 | 
			
		||||
 | 
			
		||||
  # fragment the URL
 | 
			
		||||
  my ( $host, $port, $dirs, $file, $params ) = parse_url( 'http://www.gossamer-threads.com/perl/forum/showflat.pl?Cat=&Board=GosDisc&Number=113355&page=0&view=' );
 | 
			
		||||
 | 
			
		||||
  print "Parsed Data:\n\n";
 | 
			
		||||
  print "Host: $host\n";
 | 
			
		||||
  print "Port: $port\n";
 | 
			
		||||
  print "Dirs:\n";
 | 
			
		||||
  foreach my $dir ( @{$dirs} ) {
 | 
			
		||||
      print "  $dir/\n";
 | 
			
		||||
  }
 | 
			
		||||
  print "Resource Filename: $file\n";
 | 
			
		||||
  print "Params:\n";
 | 
			
		||||
  foreach my $key ( sort keys %{$params} ) {
 | 
			
		||||
    print "  $key: ";
 | 
			
		||||
    my $values    = ( $params->{$key} || {} );
 | 
			
		||||
    foreach my $value ( sort @{$values} ) {
 | 
			
		||||
      print "'", quotemeta($value), "' ";
 | 
			
		||||
    }
 | 
			
		||||
    print "\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # put the data back together again
 | 
			
		||||
  my $url = deparse_url( $host, $port, $dirs, $file, $params );
 | 
			
		||||
  print "\nDeparsed Data:\n\n";
 | 
			
		||||
  print "URL: http://$url\n";
 | 
			
		||||
 | 
			
		||||
=head1 Methods List
 | 
			
		||||
 | 
			
		||||
=head2 build_path ( dir ARRAYREF, [ page STRING ] ) : STRING
 | 
			
		||||
 | 
			
		||||
Takes an array ref of directory names and an optional filename and returns a
 | 
			
		||||
filepath. 
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTP qw/ build_path /;
 | 
			
		||||
  print build_path( [ 'topdir', 'middir', 'bottomdir' ], 'file.html' );
 | 
			
		||||
 | 
			
		||||
This function must be imported.
 | 
			
		||||
 | 
			
		||||
=head2 build_parameters ( parameter HASHREF ) : STRING
 | 
			
		||||
 | 
			
		||||
Builds a CGI request string from list of keys and values. The function has the
 | 
			
		||||
ability to handle keys with more than one parameter, simply use an arrayref with
 | 
			
		||||
multiple values.
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTP qw/ build_parameters /;
 | 
			
		||||
  my $params = {
 | 
			
		||||
                   'simplekey' => 'value'
 | 
			
		||||
                   'onekey' => [ 'one value' ],
 | 
			
		||||
                   'anotherkey' => [ 'another value', 'and yet anotherone!' ],
 | 
			
		||||
                };
 | 
			
		||||
  print build_parameters($params);
 | 
			
		||||
 | 
			
		||||
This function must be imported.
 | 
			
		||||
 | 
			
		||||
=head2 delete_parameter ( keys ARRAYREF/ARRAY ) 
 | 
			
		||||
 | 
			
		||||
When loading the object with parameters before a request, it is possible to
 | 
			
		||||
delete an entire set of keys and values. 
 | 
			
		||||
 | 
			
		||||
=head2 deparse_url ( host STRING, [ port STRING, [ dirs ARRAYREF, [ file STRING, [ params HASH ] ] ] ] ) : STRING
 | 
			
		||||
 | 
			
		||||
This builds an entire URL from basic parameters.
 | 
			
		||||
 | 
			
		||||
For an example of this function, see the example in "Support Methods".
 | 
			
		||||
 | 
			
		||||
This function must be imported.
 | 
			
		||||
 | 
			
		||||
=head2 do_iteration ( tics INTEGER ) : STRING
 | 
			
		||||
 | 
			
		||||
The basic iteration function. This function will return undef until the resource
 | 
			
		||||
is received which, upon receipt will return the resource data. 
 | 
			
		||||
 | 
			
		||||
The function can return an empty string, so it is important to checked
 | 
			
		||||
defined'ness. If the return is an empty string, check the ERROR_CODE in
 | 
			
		||||
resource_attrib to find out if the script simply can't connect to the host or
 | 
			
		||||
the resource is empty.
 | 
			
		||||
 | 
			
		||||
=head2 EOF () : BOOLEAN
 | 
			
		||||
 | 
			
		||||
Returns '1' or '0' depending if the object has stopped receiving/sending data to
 | 
			
		||||
the remote server.
 | 
			
		||||
 | 
			
		||||
=head2 fetch ( url STRING, [ parameters HASHREF ] )
 | 
			
		||||
 | 
			
		||||
Tells the server the URL to retreive the resource of. If CGI parameters are
 | 
			
		||||
required pass in a hash of keys and values.
 | 
			
		||||
 | 
			
		||||
=head2 GT::URI::HTTP->get ( url, [ options HASH/HASHREF ] ) : RESOURCE_DATA
 | 
			
		||||
 | 
			
		||||
Simplest resource aquision method. Give it the URL and any options and the
 | 
			
		||||
function will return after the resource has been downloaded.
 | 
			
		||||
 | 
			
		||||
=head2 gulp_read ( tic INTEGER ) : RESOURCE_DATA
 | 
			
		||||
 | 
			
		||||
This is just an alias to the function do_iteration. This method is used by
 | 
			
		||||
GT::URI in its mass resource aquisition runs. 
 | 
			
		||||
 | 
			
		||||
Unless you feel like being different, you shouldn't need to use this.
 | 
			
		||||
 | 
			
		||||
=head2 load_parameter ( params HASH/HASHREF ) : HASHREF
 | 
			
		||||
 | 
			
		||||
Takes a list of keys and values and loads the values into the list of CGI
 | 
			
		||||
parameters to be sent to the remote server.
 | 
			
		||||
 | 
			
		||||
=head2 method ( method STRING ) : STRING
 | 
			
		||||
 | 
			
		||||
Sets the acquisition method for the resource. Currently, GET/POST/HEAD are
 | 
			
		||||
supported. 
 | 
			
		||||
 | 
			
		||||
If no parameters are supplied the function simply returns the current
 | 
			
		||||
acquisition method.
 | 
			
		||||
 | 
			
		||||
=head2 parse_url ( url STRING  ) : host STRING, port INTEGER, dirs ARRAYREF,
 | 
			
		||||
file STRING, params HASHREF
 | 
			
		||||
 | 
			
		||||
Takes an URL and decomposes it into easily manipulated datastructures. The
 | 
			
		||||
output can be fed back into deparse_url to reconstruct an URL.
 | 
			
		||||
 | 
			
		||||
This function must be imported.
 | 
			
		||||
 | 
			
		||||
=head2 pending ( tics INTEGER ) : BOOLEAN
 | 
			
		||||
 | 
			
		||||
If there is data available to be downloaded, this function returns '1',
 | 
			
		||||
otherwise '0'. This is another function used by GT::URI in it's mass downloads
 | 
			
		||||
and unlikely to be of any use to anyone using this module directly. This
 | 
			
		||||
function exists because it is lighter than do_iteration which can be quite a
 | 
			
		||||
load as opposed to this if there were 100 racked downloads, all being polled
 | 
			
		||||
every tenth of a second!
 | 
			
		||||
 | 
			
		||||
=head2 resource_attrib ( [ key STRING ] ) STRING or HASHREF
 | 
			
		||||
 | 
			
		||||
If a key is requested, function will return the value associated with the
 | 
			
		||||
resource attribute. If not, the function will return a hashref keyed by server
 | 
			
		||||
parameter to its corresponding value.
 | 
			
		||||
 | 
			
		||||
All the server keys have been converted into lower-case. This prevents conflict
 | 
			
		||||
with two very important keys, ERROR_CODE, and ERROR_MESSAGE, which carry the
 | 
			
		||||
HTTP error code and message associated with the aquisition of this page.
 | 
			
		||||
 | 
			
		||||
=head1 EXAMPLES
 | 
			
		||||
 | 
			
		||||
=head2 HTTP get example
 | 
			
		||||
 | 
			
		||||
  #!/usr/bin/perl
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTP;
 | 
			
		||||
 | 
			
		||||
  if ( not @ARGV ) {
 | 
			
		||||
    print qq!
 | 
			
		||||
    SYNOPSIS
 | 
			
		||||
 | 
			
		||||
      $0 url [-f/-h] [ cgi_parameter1=value1 cgi_parameter2=value2 ... ]
 | 
			
		||||
 | 
			
		||||
    basic HTTP requestor
 | 
			
		||||
 | 
			
		||||
    OPTIONS
 | 
			
		||||
 | 
			
		||||
    -f : full information; headers and resource. Usually only a dump of the resource is provided.
 | 
			
		||||
    -h : just the headers\n\n!;
 | 
			
		||||
    exit;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # parse out the command line
 | 
			
		||||
  # first argument, URL
 | 
			
		||||
  $url  = shift @ARGV;
 | 
			
		||||
 | 
			
		||||
  # next arguments, parameters
 | 
			
		||||
  foreach my $item ( @ARGV ) {
 | 
			
		||||
 | 
			
		||||
  # ... check for special requests
 | 
			
		||||
    if ( $item =~ /^-([fd])$/ ) {
 | 
			
		||||
      $mode = $1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  # ... is not a special request, but probably a parameter
 | 
			
		||||
    ( $key, $value ) = ( $item =~ /([^=]+)=(.*)/ );
 | 
			
		||||
    $key ||= $item;
 | 
			
		||||
    push @{$parameters->{$key}}, $value;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # setup and send the request
 | 
			
		||||
  $http = new GT::URI::HTTP(
 | 
			
		||||
 | 
			
		||||
                            # if we're only looking to use the head
 | 
			
		||||
                            request_method => ( $mode eq 'h' ? 'HEAD' : 'GET' )
 | 
			
		||||
 | 
			
		||||
                            );
 | 
			
		||||
  $http->fetch( $url, $parameters );
 | 
			
		||||
 | 
			
		||||
  # get the resource
 | 
			
		||||
  while ( not defined ( $doc = $http->do_iteration(-1) ) ) {}
 | 
			
		||||
 | 
			
		||||
  # and print out the headers if wanted
 | 
			
		||||
  if ( $mode ) {
 | 
			
		||||
    $headers = $http->resource_attrib();
 | 
			
		||||
    foreach $key ( sort keys %{$headers || {}} ) {
 | 
			
		||||
      print "$key: $headers->{$key}\n";
 | 
			
		||||
    }
 | 
			
		||||
    print "\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # and output the resource...
 | 
			
		||||
  print $doc;
 | 
			
		||||
 | 
			
		||||
=head1 COPYRIGHT
 | 
			
		||||
 | 
			
		||||
Copyright (c) 2000 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
http://www.gossamer-threads.com/
 | 
			
		||||
 | 
			
		||||
=head1 VERSION
 | 
			
		||||
 | 
			
		||||
Revision: $Id: HTTP.pm,v 1.30 2002/06/27 18:36:02 aki Exp $
 | 
			
		||||
 | 
			
		||||
=cut
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										601
									
								
								site/slowtwitch.com/cgi-bin/articles/admin/GT/URI/HTTPS.pm
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										601
									
								
								site/slowtwitch.com/cgi-bin/articles/admin/GT/URI/HTTPS.pm
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,601 @@
 | 
			
		||||
# ==================================================================
 | 
			
		||||
# Gossamer Threads Module Library
 | 
			
		||||
#
 | 
			
		||||
#   GT::URI::HTTP
 | 
			
		||||
#   Author  : Aki Mimoto (support@gossamer-threads.com)
 | 
			
		||||
#   CVS Info : 087,071,086,086,085      
 | 
			
		||||
#   $Id: HTTPS.pm,v 1.10 2004/08/23 20:07:44 jagerman Exp $
 | 
			
		||||
#
 | 
			
		||||
# Copyright (c) 2000 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
# ==================================================================
 | 
			
		||||
#
 | 
			
		||||
# Description:
 | 
			
		||||
#   Gets HTTP data
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
package GT::URI::HTTPS;
 | 
			
		||||
# ===============================================================
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    use strict;
 | 
			
		||||
    use lib '..';
 | 
			
		||||
    use GT::Socket;
 | 
			
		||||
    use GT::Base;
 | 
			
		||||
    use GT::URI::HTTP qw/ build_path build_parameters /;
 | 
			
		||||
    use Net::SSLeay qw(die_now die_if_ssl_error) ;
 | 
			
		||||
    use Exporter ();
 | 
			
		||||
    use vars     qw/$ATTRIBS $EOL @ISA $DEBUG @EXPORT_OK $ERRORS/;
 | 
			
		||||
 | 
			
		||||
    $ERRORS = {
 | 
			
		||||
                'NOSSLCTX'      => 'Failed to create SSL_CTX: %s',
 | 
			
		||||
                'NOSSL'         => 'Failed to create SSL: %s',
 | 
			
		||||
                'CTXOPTIONS'    => 'Failed to set SSL CTX options: %s',
 | 
			
		||||
                'NOSSLCONNECT'  => 'Failed to connect ssl',
 | 
			
		||||
                'CANTCONN'      => 'Cannot connect to server'
 | 
			
		||||
                };
 | 
			
		||||
 | 
			
		||||
    @ISA    = ('GT::URI::HTTP', 'GT::Base', 'Exporter');
 | 
			
		||||
 | 
			
		||||
    $EOL    = "\015\012";
 | 
			
		||||
    $DEBUG  = 0;
 | 
			
		||||
 | 
			
		||||
    $ATTRIBS    = {
 | 
			
		||||
        'URL'               => '',
 | 
			
		||||
        'request_method'    => 'GET',
 | 
			
		||||
        'parameters'        => {},
 | 
			
		||||
        'agent_name'        => 'Mozilla/4.73 [en]',
 | 
			
		||||
        'agent_host'        => 'X11; I; Linux 2.2.15-4mdk i586',
 | 
			
		||||
        'headers'           => {},
 | 
			
		||||
        'cookies'           => {},
 | 
			
		||||
        'resource_attribs'  => {},
 | 
			
		||||
        'resource_data'     => undef,
 | 
			
		||||
        'max_down'          => 200000,
 | 
			
		||||
        'debug'             => 0,
 | 
			
		||||
        'ctx'               => undef,
 | 
			
		||||
        'ssl'               => undef,
 | 
			
		||||
        'cipher'            => undef,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    @EXPORT_OK = qw/ parse_url deparse_url build_path build_parameters /;
 | 
			
		||||
 | 
			
		||||
    Net::SSLeay::load_error_strings();
 | 
			
		||||
    Net::SSLeay::SSLeay_add_ssl_algorithms();
 | 
			
		||||
    Net::SSLeay::randomize();
 | 
			
		||||
 | 
			
		||||
sub pending {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# return true if there is some data to be picked up
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $tics    = shift;
 | 
			
		||||
    return if ( $self->{resource_downloaded} );
 | 
			
		||||
    my $sock    = $self->{'sock'} or return 1;
 | 
			
		||||
    return $sock->pending();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub EOF {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# return true if the system has finished downloading
 | 
			
		||||
    my $self    = shift;
 | 
			
		||||
    my $sock    = $self->{'sock'};
 | 
			
		||||
    return ( $sock and $sock->fh() ? $self->{resource_downloaded} : 1 );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub do_iteration {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# useful in a loop, this is a generally non blocking way to get a page
 | 
			
		||||
# it will return nulls until it gets the page (and returns the page)
 | 
			
		||||
#
 | 
			
		||||
    my ( $self, $fh, $sock, $pause );
 | 
			
		||||
    $self   = shift;
 | 
			
		||||
    $pause  = shift;
 | 
			
		||||
    $sock   = $self->{'sock'};
 | 
			
		||||
 | 
			
		||||
# if we haven't done the request yet
 | 
			
		||||
    if ( not( $self->{'resource_downloaded'} or $sock ) and $self->{URL} ) {
 | 
			
		||||
        $self->fetch() or return ''; 
 | 
			
		||||
        $sock   = $self->{'sock'};
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# if we can't connect
 | 
			
		||||
    if ( $self->{'flag_err'} ) {
 | 
			
		||||
        return '';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    $sock->fh() or return;
 | 
			
		||||
# only if the resource hasn't been downloaded yet..,
 | 
			
		||||
    if ( !$self->{'resource_downloaded'} ) {
 | 
			
		||||
# if there's data pending, download it!
 | 
			
		||||
        if  ( $sock->pending( $pause ) ) {
 | 
			
		||||
# get as much data as we can
 | 
			
		||||
            my $ssl = $self->{'ssl'};
 | 
			
		||||
            my $buf = Net::SSLeay::read($ssl);
 | 
			
		||||
            if ( $buf ) {
 | 
			
		||||
                $self->{'resource_data'} .= $buf;
 | 
			
		||||
# if we need to parse the server headers
 | 
			
		||||
                $self->_parse_server_headers();
 | 
			
		||||
            }
 | 
			
		||||
# we've run out of data, let's stop
 | 
			
		||||
            else {
 | 
			
		||||
                $self->{resource_downloaded} = 1;
 | 
			
		||||
                return $self->{'resource_data'};
 | 
			
		||||
                Net::SSLeay::free ($ssl);
 | 
			
		||||
                Net::SSLeay::CTX_free ($self->{'ctx'});
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        return $self->{'resource_data'};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
sub fetch {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# fetches a page and handles CGI if requested
 | 
			
		||||
#
 | 
			
		||||
    my ( $self, $url, $parameters, $sock, $request_str, $fh );
 | 
			
		||||
 | 
			
		||||
    $self       = shift;
 | 
			
		||||
    $url        = shift || $self->{'URL'} or return undef;
 | 
			
		||||
    $parameters = shift || $self->{'parameters'};
 | 
			
		||||
 | 
			
		||||
# save the headers
 | 
			
		||||
    $self->{'URL'}  = $url;
 | 
			
		||||
    $self->{'parameters'}   = $parameters;
 | 
			
		||||
 | 
			
		||||
    my ($host, $port, $dirs, $file, $params) = parse_url($url);
 | 
			
		||||
 | 
			
		||||
# see if we can connect to the host
 | 
			
		||||
    $sock = GT::Socket->open({
 | 
			
		||||
        'host'      => $host,
 | 
			
		||||
        'port'      => $port,
 | 
			
		||||
        '_debug'    => $self->{_debug},
 | 
			
		||||
        'debug'     => $self->{debug},
 | 
			
		||||
        'max_down'  => $self->{max_down}
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
# submit the request to the host
 | 
			
		||||
    if ( $sock ) {
 | 
			
		||||
        $fh = $sock->fh();
 | 
			
		||||
 | 
			
		||||
# The network connection is now open, lets fire up SSL
 | 
			
		||||
        my $ctx = $self->{'ctx'} = Net::SSLeay::CTX_new()        or return $self->error("NOSSLCTX", 'WARN', $!);
 | 
			
		||||
        Net::SSLeay::CTX_set_options($ctx, &Net::SSLeay::OP_ALL) and return $self->error("CTXOPTIONS", 'WARN');
 | 
			
		||||
        my $ssl = $self->{'ssl'} = Net::SSLeay::new($ctx)        or return $self->error("NOSSL", 'WARN', $!);
 | 
			
		||||
        Net::SSLeay::set_fd($ssl, fileno($fh));
 | 
			
		||||
        my $res = $self->{'res'} = Net::SSLeay::connect($ssl);#      and return $self->error( "NOSSLCONNECT", 'WARN' );
 | 
			
		||||
 | 
			
		||||
        $self->{'cipher'} = Net::SSLeay::get_cipher($ssl);
 | 
			
		||||
 | 
			
		||||
        $self->debug( "Connected to $url" ) if ($self->{_debug});
 | 
			
		||||
        $request_str    = $self->_create_request( $url, $parameters, $host, $port, $dirs, $file, $params );
 | 
			
		||||
        $self->debug( "Sent request:\n$request_str" ) if ($self->{_debug});
 | 
			
		||||
        $res = Net::SSLeay::write($ssl, $request_str);
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        $self->debug( "Couldn't connect to $url" ) if ($self->{_debug});
 | 
			
		||||
        $self->{resource_attribs} = {
 | 
			
		||||
            ERROR_CODE      => 'CANTCONN',
 | 
			
		||||
            ERROR_MESSAGE   => 'Cannot connect to server'
 | 
			
		||||
        };
 | 
			
		||||
        $self->{resource_downloaded} = 1;
 | 
			
		||||
        $self->{flag_err}            = 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# store the socket
 | 
			
		||||
    return($self->{'sock'} = $sock);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub parse_url {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# parses the url and converts it into host
 | 
			
		||||
# assume HTTP/1.0 format
 | 
			
		||||
#
 | 
			
		||||
    my ( $url, $host, $port, $path, $file, $page, @dirs, $param, %params, $item );
 | 
			
		||||
    $url = shift;
 | 
			
		||||
 | 
			
		||||
# first, do the initial parse
 | 
			
		||||
    $url   =~ s/^[a-zA-Z]+:\/\///i;
 | 
			
		||||
    ($host, $port, $path, $param) = $url =~ /([\w.\-]*):?(\d*)\/?([^?]*)\??(.*)/;
 | 
			
		||||
    $port ||= 443;
 | 
			
		||||
 | 
			
		||||
# now handle the paths
 | 
			
		||||
    $path =~ s/^\s*(.*?)\s*$/$1/;
 | 
			
		||||
 | 
			
		||||
# the paths are not simply directories
 | 
			
		||||
    if ( $path !~ /\/$/ ) {
 | 
			
		||||
        $path =~ s/([^\/]*)$//;
 | 
			
		||||
        $file = $1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# handle the directories
 | 
			
		||||
    @dirs = grep $_, split '/', $path;
 | 
			
		||||
 | 
			
		||||
# and finally parse out the parameters if there are any
 | 
			
		||||
    foreach $item ( split /&/, $param ) {
 | 
			
		||||
        if ( $item =~ /^(.*?)=(.*)$/ ) {
 | 
			
		||||
            push @{$params{$1}}, $2;
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            push @{$params{$item}}, undef;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
# now return the collection
 | 
			
		||||
    return( $host, $port, \@dirs, $file, \%params );
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub deparse_url {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# takes a number of parameters and builds the request url
 | 
			
		||||
    my ( $url, $host, $port, $path, $file, $page, $dirs, $param, $params, $item );
 | 
			
		||||
 | 
			
		||||
    ( $host, $port, $dirs, $file, $params ) = @_;
 | 
			
		||||
 | 
			
		||||
    $url = $host;
 | 
			
		||||
 | 
			
		||||
# build the host portion
 | 
			
		||||
    $url .= ":$port" unless ($port == 443);
 | 
			
		||||
 | 
			
		||||
# now build the page access
 | 
			
		||||
    $url    .= "/" . build_path( $dirs, $file );
 | 
			
		||||
 | 
			
		||||
# and finally handle the parameters if any
 | 
			
		||||
    if ( $params and keys %{$params} ) {
 | 
			
		||||
        $url    .= "?" . build_parameters( $params );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return $url;
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
sub get {
 | 
			
		||||
#-------------------------------------------------------------------------------
 | 
			
		||||
# simple blocking method to get some data
 | 
			
		||||
    my ( $uri, $options, $HTTPS, $doc );
 | 
			
		||||
 | 
			
		||||
    shift; # get rid of the first parameter
 | 
			
		||||
    $uri        = shift or return undef;
 | 
			
		||||
    $options    = ( @_ and ref $_[0] ) ? shift : { @_ };
 | 
			
		||||
    $HTTPS       = new GT::URI::HTTPS($options);
 | 
			
		||||
    $HTTPS->fetch($uri);
 | 
			
		||||
 | 
			
		||||
    while ( not defined( $doc = $HTTPS->do_iteration(0) ) ) {};
 | 
			
		||||
 | 
			
		||||
    return $doc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
1;
 | 
			
		||||
 | 
			
		||||
__END__
 | 
			
		||||
 | 
			
		||||
=head1 NAME
 | 
			
		||||
 | 
			
		||||
GT::URI::HTTPS - HTTPS request broker. Can be used stand-alone or through GT::URI
 | 
			
		||||
 | 
			
		||||
=head1 SYNOPSIS
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTPS;
 | 
			
		||||
 | 
			
		||||
  print GT::URI::HTTPS->get( "http://www.gossamer-threads.com" );
 | 
			
		||||
 | 
			
		||||
=head1 DESCRIPTION
 | 
			
		||||
 | 
			
		||||
GT::URI::HTTPS makes requests and retrieves resources from http servers (not limited to text). 
 | 
			
		||||
 | 
			
		||||
=head1 Method List
 | 
			
		||||
 | 
			
		||||
Socket Handling
 | 
			
		||||
 | 
			
		||||
  sub pending()            Returns true if data awaiting
 | 
			
		||||
  sub EOF()                Returns open/closed status of socket
 | 
			
		||||
  sub gulp_read()          Alias to do_iteration
 | 
			
		||||
  sub do_iteration()       Basic looping function that downloads resources in the background
 | 
			
		||||
 | 
			
		||||
Acquisition
 | 
			
		||||
 | 
			
		||||
  sub fetch()              Tell the object which URL to acquire
 | 
			
		||||
  sub method()             The method of acquisition
 | 
			
		||||
  sub load_parameter()     Add a item for CGI parameters
 | 
			
		||||
  sub delete_parameter()   Delete a CGI parameter
 | 
			
		||||
  sub resource_attrib()    Headers related to resource and server
 | 
			
		||||
  sub get()                Simple resource aquisition function
 | 
			
		||||
 | 
			
		||||
Support Methods (must be imported)
 | 
			
		||||
 | 
			
		||||
  sub parse_url()          Decomposes a URL into constituent parts
 | 
			
		||||
  sub deparse_url()        Takes those parts and builds an URL  
 | 
			
		||||
  sub build_path()         Takes a list of directories and builds a path
 | 
			
		||||
  sub build_parameters()   Takes a hash of parameter->values and builds a CGI request string
 | 
			
		||||
 | 
			
		||||
=head1 Basics
 | 
			
		||||
 | 
			
		||||
=head2 Getting a resource, the simple way
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Just want a single item? Call GT::URI::HTTPS->get and all the magic will be done for you.
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTPS;
 | 
			
		||||
  my $buf = GT::URI::HTTPS->get( "http://www.gossamer-threads.com/" );
 | 
			
		||||
 | 
			
		||||
Get based requests are permissable as well:
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTPS;
 | 
			
		||||
  my $buf = GT::URI::HTTPS->get( "http://search.yahoo.com/bin/search?p=gossamer+threads" );
 | 
			
		||||
 | 
			
		||||
If extra options need to be set, simply append the options to the parameter list, like follows. 
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTPS;
 | 
			
		||||
  my $buf = GT::URI::HTTPS->get( "http://search.yahoo.com/bin/search?p=gossamer+threads", { request_method => 'POST'  } );
 | 
			
		||||
 | 
			
		||||
=head2 When just the document is not enough
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
If a new GT::URI::HTTPS object is instantiated, much more control is available, including facilities for non-blocking downloading of pages.
 | 
			
		||||
 | 
			
		||||
To create a GT::URI::HTTPS object, call new with all the options required:
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTPS;
 | 
			
		||||
 | 
			
		||||
  my $http = new GT::URI::HTTPS(
 | 
			
		||||
 | 
			
		||||
                    # URL to acquire (optional)
 | 
			
		||||
                        'URL'               => '',
 | 
			
		||||
 | 
			
		||||
                    # Can also be set to POST/GET/HEAD (optional)
 | 
			
		||||
                        'request_method'    => 'GET',
 | 
			
		||||
 | 
			
		||||
                    # a hash of keys pointing to an arrayref of values to be sent to the server
 | 
			
		||||
                    # {
 | 
			
		||||
                    #    'key' => [ 'value1', 'value2'... ],
 | 
			
		||||
                    # }
 | 
			
		||||
                    # (optional)
 | 
			
		||||
                        'parameters'        => {},
 | 
			
		||||
 | 
			
		||||
                    # Name portion of the User-Agent: string the server acquires (optional)
 | 
			
		||||
                        'agent_name'        => 'Mozilla/4.73 [en]',
 | 
			
		||||
 | 
			
		||||
                    # Host-from portion of the User-Agent: string the server acquires (optional)
 | 
			
		||||
                        'agent_host'        => 'X11; I; Linux 2.2.15-4mdk i586',
 | 
			
		||||
 | 
			
		||||
                    # To prevent downloading of 80Tb files, but if you still wanted to, set this to 0 (optional)
 | 
			
		||||
                        'max_down'          => 200000
 | 
			
		||||
 | 
			
		||||
                    );
 | 
			
		||||
 | 
			
		||||
If URL has been specified in the options, for interactions with a CGI, you can set extra parameters with $http->load_parameter(). Finally, loop on $http->do_iteration() until the value is defined. To replicate the "simple get" example:
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTPS;
 | 
			
		||||
  $|++;
 | 
			
		||||
 | 
			
		||||
  my $http = new GT::URI::HTTPS(
 | 
			
		||||
 | 
			
		||||
                              URL        => 'http://search.yahoo.com/bin/search',
 | 
			
		||||
 | 
			
		||||
                              # can also use the following:
 | 
			
		||||
                              parameters => {
 | 
			
		||||
                                              'p' => [ 'gossamer threads' ]
 | 
			
		||||
                                            }
 | 
			
		||||
 | 
			
		||||
                              );
 | 
			
		||||
 | 
			
		||||
  my $doc;
 | 
			
		||||
  while ( not defined( $doc = $http->do_iteration() ) ) {
 | 
			
		||||
          # do something here while waiting for the resource to arrive 
 | 
			
		||||
          print "." 
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
  print $doc, "\n\n";
 | 
			
		||||
 | 
			
		||||
Beyond the resource, the http server often supplies extra information in a header. To access this information, use $http->resource_attrib(). 
 | 
			
		||||
 | 
			
		||||
Appending this code to the previous example, a list of all the associated server headers can be seen:
 | 
			
		||||
 | 
			
		||||
  my $attribs = $http->resource_attrib();
 | 
			
		||||
  foreach my $key ( sort keys %{$attribs} ) {
 | 
			
		||||
    print "$key => $attribs->{$key}\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
=head2 Support Methods
 | 
			
		||||
 | 
			
		||||
In addition to the basic fetching abilities, since the module must parse HTTPS URLs, the methods used to do so have been made public. 
 | 
			
		||||
 | 
			
		||||
These methods decompose URLs into datastructures that make URLs easily studied or modified and then reconstructed.
 | 
			
		||||
 | 
			
		||||
However, these routines have not been polished for useability so beware! The following is a very basic example:
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTPS qw/ parse_url deparse_url build_path build_parameters /;
 | 
			
		||||
 | 
			
		||||
  # fragment the URL
 | 
			
		||||
  my ( $host, $port, $dirs, $file, $params ) = parse_url( 'http://www.gossamer-threads.com/perl/forum/showflat.pl?Cat=&Board=GosDisc&Number=113355&page=0&view=' );
 | 
			
		||||
 | 
			
		||||
  print "Parsed Data:\n\n";
 | 
			
		||||
  print "Host: $host\n";
 | 
			
		||||
  print "Port: $port\n";
 | 
			
		||||
  print "Dirs:\n";
 | 
			
		||||
  foreach my $dir ( @{$dirs} ) {
 | 
			
		||||
      print "  $dir/\n";
 | 
			
		||||
  }
 | 
			
		||||
  print "Resource Filename: $file\n";
 | 
			
		||||
  print "Params:\n";
 | 
			
		||||
  foreach my $key ( sort keys %{$params} ) {
 | 
			
		||||
    print "  $key: ";
 | 
			
		||||
    my $values    = ( $params->{$key} || {} );
 | 
			
		||||
    foreach my $value ( sort @{$values} ) {
 | 
			
		||||
      print "'", quotemeta($value), "' ";
 | 
			
		||||
    }
 | 
			
		||||
    print "\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # put the data back together again
 | 
			
		||||
  my $url = deparse_url( $host, $port, $dirs, $file, $params );
 | 
			
		||||
  print "\nDeparsed Data:\n\n";
 | 
			
		||||
  print "URL: http://$url\n";
 | 
			
		||||
 | 
			
		||||
=head1 Methods List
 | 
			
		||||
 | 
			
		||||
=head2 build_path ( dir ARRAYREF, [ page STRING ] ) : STRING
 | 
			
		||||
 | 
			
		||||
Takes an array ref of directory names and an optional filename and returns a filepath. 
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTPS qw/ build_path /;
 | 
			
		||||
  print build_path( [ 'topdir', 'middir', 'bottomdir' ], 'file.html' );
 | 
			
		||||
 | 
			
		||||
This function must be imported.
 | 
			
		||||
 | 
			
		||||
=head2 build_parameters ( parameter HASHREF ) : STRING
 | 
			
		||||
 | 
			
		||||
Builds a CGI request string from list of keys and values. The function has the ability to handle keys with more than one parameter, simply use an arrayref with multiple values.
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTPS qw/ build_parameters /;
 | 
			
		||||
  my $params = {
 | 
			
		||||
                   'simplekey' => 'value'
 | 
			
		||||
                   'onekey' => [ 'one value' ],
 | 
			
		||||
                   'anotherkey' => [ 'another value', 'and yet anotherone!' ],
 | 
			
		||||
                };
 | 
			
		||||
  print build_parameters($params);
 | 
			
		||||
 | 
			
		||||
This function must be imported.
 | 
			
		||||
 | 
			
		||||
=head2 delete_parameter ( keys ARRAYREF/ARRAY ) 
 | 
			
		||||
 | 
			
		||||
When loading the object with parameters before a request, it is possible to delete an entire set of keys and values. 
 | 
			
		||||
 | 
			
		||||
=head2 deparse_url ( host STRING, [ port STRING, [ dirs ARRAYREF, [ file STRING, [ params HASH ] ] ] ] ) : STRING
 | 
			
		||||
 | 
			
		||||
This builds an entire URL from basic parameters.
 | 
			
		||||
 | 
			
		||||
For an example of this function, see the example in "Support Methods".
 | 
			
		||||
 | 
			
		||||
This function must be imported.
 | 
			
		||||
 | 
			
		||||
=head2 do_iteration ( tics INTEGER ) : STRING
 | 
			
		||||
 | 
			
		||||
The basic iteration function. This function will return undef until the resource is received which, upon receipt will return the resource data. 
 | 
			
		||||
 | 
			
		||||
The function can return an empty string, so it is important to checked defined'ness. If the return is an empty string, check the ERROR_CODE in resource_attrib to find out if the script simply can't connect to the host or the resource is empty.
 | 
			
		||||
 | 
			
		||||
=head2 EOF () : BOOLEAN
 | 
			
		||||
 | 
			
		||||
Returns '1' or '0' depending if the object has stopped receiving/sending data to the remote server.
 | 
			
		||||
 | 
			
		||||
=head2 fetch ( url STRING, [ parameters HASHREF ] )
 | 
			
		||||
 | 
			
		||||
Tells the server the URL to retreive the resource of. If CGI parameters are required pass in a hash of keys and values.
 | 
			
		||||
 | 
			
		||||
=head2 GT::URI::HTTPS->get ( url, [ options HASH/HASHREF ] ) : RESOURCE_DATA
 | 
			
		||||
 | 
			
		||||
Simplest resource aquision method. Give it the URL and any options and the function will return after the resource has been downloaded.
 | 
			
		||||
 | 
			
		||||
=head2 gulp_read ( tic INTEGER ) : RESOURCE_DATA
 | 
			
		||||
 | 
			
		||||
This is just an alias to the function do_iteration. This method is used by GT::URI in its mass resource aquisition runs. 
 | 
			
		||||
 | 
			
		||||
Unless you feel like being different, you shouldn't need to use this.
 | 
			
		||||
 | 
			
		||||
=head2 load_parameter ( params HASH/HASHREF ) : HASHREF
 | 
			
		||||
 | 
			
		||||
Takes a list of keys and values and loads the values into the list of CGI parameters to be sent to the remote server.
 | 
			
		||||
 | 
			
		||||
=head2 method ( method STRING ) : STRING
 | 
			
		||||
 | 
			
		||||
Sets the acquisition method for the resource. Currently, GET/POST/HEAD are supported. 
 | 
			
		||||
 | 
			
		||||
If no parameters are supplied the function simply returns the current acquisition method.
 | 
			
		||||
 | 
			
		||||
=head2 parse_url ( url STRING  ) : host STRING, port INTEGER, dirs ARRAYREF, file STRING, params HASHREF
 | 
			
		||||
 | 
			
		||||
Takes an URL and decomposes it into easily manipulated datastructures. The output can be fed back into deparse_url to reconstruct an URL.
 | 
			
		||||
 | 
			
		||||
This function must be imported.
 | 
			
		||||
 | 
			
		||||
=head2 pending ( tics INTEGER ) : BOOLEAN
 | 
			
		||||
 | 
			
		||||
If there is data available to be downloaded, this function returns '1', otherwise '0'. This is another function used by GT::URI in it's mass downloads and unlikely to be of any use to anyone using this module directly. This function exists because it is lighter than do_iteration which can be quite a load as opposed to this if there were 100 racked downloads, all being polled every tenth of a second!
 | 
			
		||||
 | 
			
		||||
=head2 resource_attrib ( [ key STRING ] ) STRING or HASHREF
 | 
			
		||||
 | 
			
		||||
If a key is requested, function will return the value associated with the resource attribute. If not, the function will return a hashref keyed by server parameter to its corresponding value.
 | 
			
		||||
 | 
			
		||||
All the server keys have been converted into lower-case. This prevents conflict with two very important keys, ERROR_CODE, and ERROR_MESSAGE, which carry the HTTPS error code and message associated with the aquisition of this page.
 | 
			
		||||
 | 
			
		||||
=head1 EXAMPLES
 | 
			
		||||
 | 
			
		||||
=head2 HTTPS get example
 | 
			
		||||
 | 
			
		||||
  #!/usr/bin/perl
 | 
			
		||||
 | 
			
		||||
  use GT::URI::HTTPS;
 | 
			
		||||
 | 
			
		||||
  if ( not @ARGV ) {
 | 
			
		||||
    print qq!
 | 
			
		||||
    SYNOPSIS
 | 
			
		||||
 | 
			
		||||
      $0 url [-f/-h] [ cgi_parameter1=value1 cgi_parameter2=value2 ... ]
 | 
			
		||||
 | 
			
		||||
    basic HTTPS requestor
 | 
			
		||||
 | 
			
		||||
    OPTIONS
 | 
			
		||||
 | 
			
		||||
    -f : full information; headers and resource. Usually only a dump of the resource is provided.
 | 
			
		||||
    -h : just the headers\n\n!;
 | 
			
		||||
    exit;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # parse out the command line
 | 
			
		||||
  # first argument, URL
 | 
			
		||||
  $url  = shift @ARGV;
 | 
			
		||||
 | 
			
		||||
  # next arguments, parameters
 | 
			
		||||
  foreach my $item ( @ARGV ) {
 | 
			
		||||
 | 
			
		||||
  # ... check for special requests
 | 
			
		||||
    if ( $item =~ /^-([fd])$/ ) {
 | 
			
		||||
      $mode = $1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  # ... is not a special request, but probably a parameter
 | 
			
		||||
    ( $key, $value ) = ( $item =~ /([^=]+)=(.*)/ );
 | 
			
		||||
    $key ||= $item;
 | 
			
		||||
    push @{$parameters->{$key}}, $value;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # setup and send the request
 | 
			
		||||
  $http = new GT::URI::HTTPS(
 | 
			
		||||
 | 
			
		||||
                            # if we're only looking to use the head
 | 
			
		||||
                            request_method => ( $mode eq 'h' ? 'HEAD' : 'GET' )
 | 
			
		||||
 | 
			
		||||
                            );
 | 
			
		||||
  $http->fetch( $url, $parameters );
 | 
			
		||||
 | 
			
		||||
  # get the resource
 | 
			
		||||
  while ( not defined ( $doc = $http->do_iteration(-1) ) ) {}
 | 
			
		||||
 | 
			
		||||
  # and print out the headers if wanted
 | 
			
		||||
  if ( $mode ) {
 | 
			
		||||
    $headers = $http->resource_attrib();
 | 
			
		||||
    foreach $key ( sort keys %{$headers || {}} ) {
 | 
			
		||||
      print "$key: $headers->{$key}\n";
 | 
			
		||||
    }
 | 
			
		||||
    print "\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  # and output the resource...
 | 
			
		||||
  print $doc;
 | 
			
		||||
 | 
			
		||||
=head1 COPYRIGHT
 | 
			
		||||
 | 
			
		||||
Copyright (c) 2000 Gossamer Threads Inc.  All Rights Reserved.
 | 
			
		||||
http://www.gossamer-threads.com/
 | 
			
		||||
 | 
			
		||||
=head1 VERSION
 | 
			
		||||
 | 
			
		||||
Revision: $Id: HTTPS.pm,v 1.10 2004/08/23 20:07:44 jagerman Exp $
 | 
			
		||||
 | 
			
		||||
=cut
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user