discourse-legacysite-perl/site/slowtwitch.com/cgi-bin/articles/GT/WWW.pm
2024-06-17 21:49:12 +10:00

1118 lines
34 KiB
Perl

# ====================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::WWW
# Author: Jason Rhinelander
# CVS Info : 087,071,086,086,085
# $Id: WWW.pm,v 1.30 2009/06/09 00:57:25 brewt Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ====================================================================
#
# Description:
# Implements retrieving and posting to WWW sites, using HTTP or HTTPS. HTTPS
# support requires Net::SSLeay.
#
# GT::URI should be considered deprecated in favour of this module.
#
package GT::WWW;
use strict;
use Carp;
use Symbol;
use Net::servent;
use vars qw/%PUBLIC $ERRORS %PROTOCOL $HOST_RE $VERSION @EXPORT_OK @ISA/;
require Exporter;
$VERSION = sprintf "%d.%03d", q$Revision: 1.30 $ =~ /(\d+)\.(\d+)/;
@EXPORT_OK = qw/get post head getprint postprint/;
@ISA = 'Exporter';
%PUBLIC = ( # Public methods, passable via new(). The values are the equivelant methods.
server => 'server',
port => 'port',
path => 'path',
username => 'username',
password => 'password',
header => 'header',
parameters => 'parameters',
query_string => 'query_string',
debug => 'debug_level',
debug_level => 'debug_level',
agent => 'agent',
chunk => 'chunk',
chunk_size => 'chunk_size'
);
%PROTOCOL = (
http => __PACKAGE__ . "::http",
https => __PACKAGE__ . "::https",
# ftp => __PACKAGE__ . "::ftp"
);
# This must capture two and only two subpatterns - host and port.
# The following _REQUIRES_ /x regex modifier! I'd use (?x), but it doesn't
# work the same way in 5.004 that it does now.
$HOST_RE = '(
(?: \w(?:[-\w]*\w)? \. )* # "foo.foo." of "foo.foo.com"
\w(?:[-\w]*\w)?\.? # and the "com", or possibly just a host like "penguin"
# Domains can also end with a single .
|
(?:(?:[01]?\d?\d|2[0-4]\d|25[0-5])\.){3} # Match an IP, but only 0-255 in
(?:[01]?\d?\d|2[0-4]\d|25[0-5]) # each field - nothing > 255
)
(?::(\d+)?)? # port - The RFC says: http://foo:/path is valid
';
#use Socket; # GT::Socket's read/write methods aren't sufficient and are too
# buggy. The only way to make use of it would be to use it to connect, then
# directly access the filehandle, so instead of loading all the useless code,
# GT::WWW modules should use GT::Socket::Client instead.
sub new {
my $class = shift;
my $self = {};
bless $self, ref($class) || $class;
if (@_) {
if (ref $_[0] eq 'HASH' or @_ % 2 == 0) {
my $input = ref $_[0] eq 'HASH' ? shift : {@_};
$self->debug_level(delete $input->{debug}) if exists $input->{debug};
# Handle the protocol before the other options, because some might be available to all protocols
if (exists $input->{protocol}) {
my $proto = delete $input->{protocol};
$self->protocol($proto);
}
for my $key (keys %$input) {
croak "Invalid parameter '$key'"
unless substr($key, 0, 1) ne '_'
and $self->can($key);
$self->$key($input->{$key});
}
}
elsif (@_ == 1 and not ref $_[0]) {
$self->url(@_);
}
else {
croak "Unknown arguments: @_";
}
}
$self->{debug} ||= 0; # So that: 'if $self->{debug} >= 2' won't generate warnings
$self;
}
# Figures out the arguments, and returns a properly set up GT::WWW subclass.
# Used by the quick get* and post* methods/functions below.
sub _quick_args {
(my $called_from = (caller(1))[3]) =~ s/.*:://;
my ($class, $url);
if (@_ == 0 or @_ == 1 and UNIVERSAL::isa($_[0], __PACKAGE__)) {
# Subclasses override get(), so this is either a broken subclass or
# calling get() on a GT::WWW object.
croak "Usage: $called_from(URL), GT::WWW->$called_from(URL), or set a URL first";
# As soon as a URL is set on the object, the object is reblessed.
}
elsif (@_ == 1) {
# function: (GT::WWW::)get(URL)
($class, $url) = (__PACKAGE__, shift);
}
elsif (@_ == 2) {
if (ref $_[0] eq __PACKAGE__) {
croak "Usage: GT::WWW->$called_from(URL) or $called_from(URL); \$gtwww->$called_from(URL) is not permitted";
}
elsif (ref $_[0] or $_[0] ne __PACKAGE__) {
# $other->get(URL) or Other->get(URL) - if it gets here, $other's
# class is broken
croak "Usage: GT::WWW->$called_from(URL) or $called_from(URL) (Subclass " .
(ref $_[0] or $_[0]) . " is probably broken: ->$called_from() method came to GT::WWW)";
}
else {
($class, $url) = (@_);
}
}
else {
croak "Usage: GT::WWW->$called_from(URL), \$gtwww->$called_from(URL), or $called_from(URL)";
}
$url or croak 'Error: No URL specified';
$class->new({ fatal_errors => 1, url => $url });
}
# This should be subclassed by all subclasses. The function below should be
# called only as a function (perl -MGT::WWW=get -e 'print get("...url...")')
sub get {
my $www = &_quick_args;
$www->get;
}
# Basically, this accomplishes the same as: print GT::WWW->get(URL), except
# that it also sets up chunked downloading so that it can efficiently be used
# with large pages.
sub getprint {
my $www = &_quick_args;
$www->chunk(sub { print ${$_[0]} });
$www->chunk_size(4096);
$www->get();
}
sub post {
my $www = &_quick_args;
$www->post();
}
sub postprint {
my $www = &_quick_args;
$www->chunk(sub { print ${$_[0]} });
$www->chunk_size(4096);
$www->post();
}
sub head {
my $www = &_quick_args;
$www->head;
}
sub cancel { } # No ops by default
sub cancelled { }
sub parse_url {
my ($self, $url) = splice @_, 0, 2;
my $strict = shift if not ref $self;
my $valid_user = '(?:[\w\$.+!*\'()|,;?&=-]|%[0-9A-Fa-f]{2})+';
my $hsegment = '(?:[\w\$.+!*\'()|,;~&=:@-]|%[0-9A-Fa-f]{2})+';
my $hrelaxed = '(?:[^\x00-\x08\x0a-\x1f#%/?]|%[0-9A-Fa-f]{2})+';
my $qrelaxed = '(?:[^\x00-\x08\x0a-\x1f#%]|%[0-9A-Fa-f]{2})+';
my $proto_re = join "|", map quotemeta, keys %PROTOCOL;
# The following regex is designed primarily for http and https URL's, and
# thus allows things that don't necessarily make sense for other protocols.
# i.e.: ftp://jagerman:password@my.server/foo/bar.txt?asdf=zxcv#anchor
# ^^^^^^^^^^^^^^^^^
# This _could_ allow for protocol extensions, for example 'type=ascii' for
# FTP requests.
my ($protocol, $username, $password, $server, $port, $path, $query_string) =
(ref $self ? $self->{strict} : $strict)
? $url =~ m(
^
($proto_re)
://
(?:
($valid_user) # username
(?:
:($valid_user)? # password
)?
@
)?
$HOST_RE
(
/+(?:$hsegment(?:/+(?:$hsegment)?)*)? # Match the path (/foo/bar.cgi)
)?
(?:
\?($hsegment)?
)?
(?:
\#.* # Allow a possible anchor - but we don't care about it
)?
$
)iox
: $url =~ m(
^
($proto_re)
://
(?:
($valid_user) # username
(?:
:($valid_user)? # password
)?
@
)?
$HOST_RE
(
/+(?:$hrelaxed(?:/+(?:$hrelaxed)?)*)? # Match the path (/foo/bar.cgi)
)?
(?:
\?($qrelaxed)?
)?
(?:
\#.* # Allow a possible anchor - but we don't care about it
)?
$
)iox;
return ($protocol, $username, $password, $server, $port, $path, $query_string);
}
sub url {
my $self = shift;
if (!@_) {
my $proto = $self->protocol;
my $host = $self->host;
$proto and $host or return undef;
my $url = "$proto://";
my $username = $self->username;
my $password = $self->password;
if ($username) {
$url .= $username;
$url .= ":$password" if $password;
$url .= "@";
}
$url .= "$host";
my $port = $self->port;
$url .= ":$port" unless $port == $self->default_port;
my $path = $self->path;
$url .= $path if $path;
my $query = $self->query_string;
$url .= "?$query" if $query;
return $url;
}
my $url = shift;
my ($protocol, $username, $password, $server, $port, $path, $query_string) =
$self->parse_url($url);
unless ($protocol) {
croak "Invalid URL: '$url'";
}
$self->protocol($protocol);
if (defined $username) {
$username =~ s/%([0-9A-Fa-f])/chr hex $1/eg;
$self->username($username);
}
else { $self->reset_username() }
if (defined $password) {
$password =~ s/%([0-9A-Fa-f])/chr hex $1/eg;
$self->password($password);
}
else { $self->reset_password() }
$self->host($server);
$port ? $self->port($port) : $self->reset_port;
$self->path($path);
defined $query_string ? $self->query_string($query_string) : $self->reset_parameters;
return 1;
}
sub protocol {
my $self = shift;
if (@_) {
my $protocol = shift;
croak "Protocol '$protocol' not supported" unless $self->protocol_supported($protocol);
$self->{protocol} = lc $protocol;
my $pkg = $PROTOCOL{$self->{protocol}};
bless $self, $pkg;
}
$self->{protocol};
}
sub protocol_supported {
my ($self, $protocol) = @_;
unless ($protocol) {
$self->debug("Protocol not supported: No protocol entered") if ref $self and $self->{debug};
return undef;
}
my $pkg = $PROTOCOL{lc $protocol};
unless ($pkg) {
$self->debug("Protocol '$pkg' not supported") if ref $self and $self->{debug};
return undef;
}
(my $mod = $pkg) =~ s|::|/|g;
$mod .= ".pm";
my $loaded_ok = eval { require $mod };
unless ($loaded_ok) {
$self->debug("Protocol '$pkg' not supported: require $mod failed: $@") if ref $self and $self->{debug};
return undef;
}
unless ($pkg->isa(__PACKAGE__)) {
$self->debug("Protocol '$pkg' not supported: does not inherit from " . __PACKAGE__) if ref $self and $self->{debug};
return undef;
}
return 1;
}
# Takes a host and port ("host:port") - returns true if valid in scalar
# context, (host, port) in list context, or undef if invalid.
sub valid_host {
my ($self, $host) = @_;
return $host && $host =~ /^$HOST_RE$/x;
}
sub host {
my $self = shift;
if (@_) {
my $host = shift;
$self->{host} = undef;
croak "Invalid hostname '$host' specified" unless ($host, my $port) = $self->valid_host($host);
$self->{host} = $host;
$port ? $self->port($port) : $self->reset_port;
}
$self->{host};
}
sub port {
my $self = shift;
if (@_) {
my $port = shift;
unless ($port and $port =~ /^[0-9]+$/) {
my $s = getservbyname($port) or croak "No such port: '$port'";
$port = $s->port;
}
$self->{port} = $port;
}
$self->{port} || $self->default_port;
}
sub post_data {
my $self = shift;
if (@_) {
$self->{post_data} = shift;
$self->debug("Setting post_data to '$self->{post_data}'") if $self->{debug} >= 2;
}
$self->{post_data};
}
sub reset_port {
my $self = shift;
delete $self->{port};
}
sub username {
my $self = shift;
if (@_) {
$self->{username} = shift;
$self->debug("Setting username to '$self->{username}'") if $self->{debug} >= 2;
}
$self->{username};
}
sub reset_username {
my $self = shift;
$self->debug("Username reset") if $self->{username} and $self->{debug} >= 2;
delete $self->{username};
}
sub password {
my $self = shift;
if (@_) {
$self->{password} = shift;
}
$self->{password};
}
sub reset_password {
my $self = shift;
delete $self->{password};
}
sub connection_timeout {
my $self = shift;
if (@_) {
$self->{conn_timeout} = shift;
}
exists $self->{conn_timeout} ? $self->{conn_timeout} : 10;
}
sub path {
my $self = shift;
if (@_) {
my $path = shift;
$path = '' unless defined $path;
$path =~ s/%([0-9A-Fa-f]{2})/chr hex $1/eg;
$path = '/' . $path unless substr($path, 0, 1) eq '/';
$self->{path} = $path;
}
$self->{path};
}
# Replaces the parameters hash after parsing the query string. Optionally takes
# a third argument - if true, the parameters are added, not replaced. Note that
# the query_string is only an interface to the parameters() method, and will be
# recreated before being sent to the server.
# Calling query_string without arguments in produces a query string (with
# necessary escaping) from all parameters that have been set.
sub query_string {
my $self = shift;
if (@_) {
my ($query, $add) = @_;
$query = '' if not defined $query; # An empty or undefined query string can be used to clear parameters
my $hsegment = '(?:[\w\$.+!*\'()|,;~&=:@-]|%[0-9A-Fa-f]{2})*';
my $qrelaxed = '(?:[^\x00-\x08\x0a-\x1f#%]|%[0-9A-Fa-f]{2})*';
$self->{strict} ? $query =~ /^$hsegment$/o : $query =~ /^$qrelaxed$/ or croak "Invalid query string '$query'";
unless ($add) {
$self->{params} = [];
}
for (split /[&;]/, $query) {
my @kv = split /=/, $_, 2;
$kv[0] =~ s/%([0-9A-Fa-f]{2})/chr hex $1/eg;
$kv[0] =~ y/+/ /;
if ($kv[1]) {
$kv[1] =~ s/%([0-9A-Fa-f]{2})/chr hex $1/eg;
$kv[1] =~ y/+/ /;
}
push @{$self->{params}}, @kv[0, 1]; # Will use "undef" for b in this case: a=1&b&c=2
}
return 1;
}
else {
my $ret = '';
return $ret if !$self->{params} or !@{$self->{params}};
for (my $i = 0; $i < @{$self->{params}}; $i += 2) {
my ($k, $v) = @{$self->{params}}[$i, $i + 1];
$k =~ s/([^\w. -])/sprintf "%%%02X", ord $1/eg;
$k =~ y/ /+/;
if ($v) {
$v =~ s/([^\w. -])/sprintf "%%%02X", ord $1/eg;
$v =~ y/ /+/;
}
$ret .= $k;
$ret .= "=$v" if defined $v;
$ret .= '&';
}
chop $ret;
return $ret;
}
}
sub parameters {
my $self = shift;
my @ret;
@ret = @{$self->{params}} if $self->{params} and defined wantarray;
if (@_) {
my @params = @_;
@params = @{$params[0]} if @params == 1 and ref $params[0] eq 'ARRAY';
my $add;
$add = pop @params if @params % 2;
if ($add) {
push @{$self->{params}}, @params;
}
else {
$self->{params} = \@params;
}
}
@ret;
}
sub reset_parameters {
my $self = shift;
delete $self->{params};
}
sub strict {
my $self = shift;
if (@_) {
$self->{strict} = shift() ? 1 : undef;
}
$self->{strict};
}
sub agent {
my $self = shift;
my $ret = $self->{agent};
if (@_) {
my $agent = shift;
$agent =~ /[\x00-\x08\x0a-\x1f\x7f]/ and croak "Invalid User-Agent '$agent'";
$self->{agent} = $agent;
}
unless ($ret) { $ret = $self->default_agent() }
else {
if ($ret =~ /[\s,:;]$/) { $ret .= $self->default_agent() }
if ($ret =~ /^[\s,:;]/) { $ret = $self->default_agent() . $ret }
}
$ret;
}
sub default_agent {
my $self = shift;
my $pkg = ref $self || $self || __PACKAGE__;
my $pstash;
if ($pstash = $::{"$pkg\::"} and $pstash->{VERSION} and my $scalar = *{\$pstash->{VERSION}}{SCALAR}) {
return "$pkg/$$scalar";
}
else {
return __PACKAGE__ . '/' . $VERSION;
}
}
sub chunk {
my $self = shift;
if (@_) {
my $coderef = shift;
ref $coderef eq 'CODE' or not defined $coderef or croak "Usage: \$www->chunk(CODEREF | undef)";
$self->{chunk_code} = $coderef;
return 1;
}
$self->{chunk_code};
}
sub chunk_size {
my $self = shift;
if (@_) {
my $chunk_size = shift;
defined $chunk_size and $chunk_size > 0
or croak 'Usage: $www->chunk_size(BYTES)';
$self->{chunk_size} = $chunk_size;
}
$self->{chunk_size};
}
sub debug {
my $self = @_ > 1 ? shift : __PACKAGE__;
$self = ref $self if ref $self;
carp "$self: @_";
}
sub debug_level {
my $self = shift;
if (@_) {
$self->{debug} = shift;
}
$self->{debug};
}
sub fatal_errors {
my $self = shift;
if (@_) {
$self->{fatal_errors} = shift;
}
$self->{fatal_errors};
}
sub error {
my $self = shift;
if (@_) {
$self->{error} = shift;
croak $self->{error} if $self->{fatal_errors};
$self->debug("Error: $self->{error}") if $self->{debug};
return undef;
}
$self->{error};
}
sub file {
shift if @_ and UNIVERSAL::isa($_[0], __PACKAGE__);
GT::WWW::File->new(@_);
}
package GT::WWW::File;
# This package is used when a protocol can take a file as input (for example,
# an HTTP POST file upload). To get a "file" object, call GT::WWW->file(params).
# The parameters are defined in the POD.
use Carp;
sub new {
my ($class, $filename, $handle) = @_;
my $self = {};
bless $self, ref($class) || $class;
defined $filename and $filename =~ m{([^\x00\x1f\x7f\\/]+)$} and $filename ne '.' and $filename ne '..'
or croak 'No (or invalid) filename specified. Usage: ' . __PACKAGE__ . '->new(FILENAME, PATH_OR_GLOBREF)';
$self->{filename} = $1;
if ($handle and ref $handle eq 'GLOB' and defined fileno($handle)) {
$self->{fh} = $handle;
}
else {
$handle = $filename if not defined $handle;
if ($handle) {
if (!-r $handle) {
croak "File specified ($handle) does not exist, or is not readable";
}
elsif (-d _) {
croak "File specified ($handle) is a directory";
}
else {
my $fh = \do { local *FH; *FH };
open $fh, "< $handle" or croak "Open to open file specified ($handle): $!";
binmode $fh;
$self->{fh} = $fh;
}
}
else {
croak 'No opened globref or filename specified. Usage: ' . __PACKAGE__ . '->new(FILENAME, PATH_OR_GLOBREF)';
}
}
$self;
}
sub fh {
my $self = shift;
return $self->{fh};
}
# Returns the size of the file, if available, or undef if it can't be
# determined (such as a socket, device (e.g. /dev/audio), or special file (e.g.
# /proc/cpuinfo)).
sub size {
my $self = shift;
if (-f $self->{fh} and my $size = -s _) {
return $size;
}
return undef;
}
sub filename {
my $self = shift;
$self->{filename};
}
1;
__END__
=head1 NAME
GT::WWW - Multi-protocol retrieving and posting, related in functionality to
LWP.
=head1 DESCRIPTION
GT::WWW is designed to provide a common interface for multiple protocols (as of
this writing, only HTTP and HTTPS, however others are planned) and handles
HEAD, GET, and POST requests. For non-HTTP-based protocols, what, exactly, a
"HEAD", "GET", or "POST" request is depends on the module in question. For
example, with FTP "GET" might download a file, while "POST" might upload one to
the server, and "HEAD" might return just the size of the file.
The modules under GT::WWW B<should not> be used directly; this module should be
used instead. The documentation here describes the options common to all
protocols - however you should check the POD of the protocol subclasses
(GT::WWW::http, GT::WWW::https, etc.) to see any extra options or methods that
those modules provide.
=head1 SYNOPSIS
Quick way:
use GT::WWW;
my $www = GT::WWW->get("http://server.com/page");
... = GT::WWW->post("http://server.com/page");
... = GT::WWW->head("http://server.com/page");
... = GT::WWW->...("http://user:pass@server.com/page");
... = GT::WWW->...("https://server.com/page");
# This query string will be parsed and passed as POST input:
... = GT::WWW->post("http://server.com/page?foo=bar;bar=foo");
Longer, but more capable way:
use GT::WWW;
my $request = GT::WWW->new();
$request->protocol("http");
$request->host("server.com");
$request->port(8080);
$request->path("/path/foo.cgi");
$request->username("user");
$request->password("pass");
$request->parameters(foo => "bar", bar => "foo");
equivelant to the above, using ->url():
$request->url("http://user:pass@server.com:8080/path/foo.cgi?foo=bar;bar=foo");
Now call $request->get(), $request->post(), or $request->head().
Very quick way to print a page:
perl -MGT::WWW=get -e 'print get("http://server.com/page?foo=bar&bar=foo")'
=head1 METHODS
Note that all methods that set values (such as host(), port(), etc.) also
return the value when called without any argument.
=head2 new
Call new() to get a new GT::WWW object. You can call it without arguments to
get a generic GT::WWW object, or use arguments as described below.
=over 4
=item URL
You can call new with a single scalar argument - a URL to be parsed. The URL is
of the same format as taken by the url() method.
=item HASH
You can alternatively call new() with a hash (or hash reference) of options.
Each of the methods described below can be passed in to new in the form of
C<key =E<gt> value> pairs - the methods will be called with the values
specified automatically.
=back
=head2 head
=head2 get
=head2 post
These are the methods used to tell the module to actually connect to the server
and download the requested page.
When used as GT::WWW class methods or function calls (but B<NOT> as methods on
GT::WWW objects or sub-objects), they take a single URL as an argument. This
call creates an internal GT::WWW object, turns on
L<C<fatal_errors(1)>|/fatal_errors>, passes the URL to L<C<url()>|/url>, then
calls the appropriate C<get()>, C<head()>, or C<post()> method of the resulting
protocol-specific object.
Note, however, that once you have specified a protocol (either via
L<C<protocol()>|/protocol>, or as part of a url passed to L<C<url()>|/url>)
your object ceases to be a GT::WWW object and becomes a protocol-specific
GT::WWW subclass. All subclasses provide their own versions of these methods.
The subclassed methods are not described here because they may not be supported
for all protocols, and their return value(s) may differ from one protocol to
the next. For more details, see the modules listed in the
L<SEE ALSO|/"SEE ALSO"> section.
Generally, get() and post() return an overloaded object that can be used as a
string to get the content (i.e. for printing), but see the notes in the CAVEATS
section of L<GT::WWW::http::Response> for anything more complicated than
concatenation or printing.
=head2 url
Takes a URL as argument. The URL is parsed into several fields: C<protocol>,
C<username>, C<password>, C<host>, C<port>, C<path>, and C<query_string>, then
each of those properties are set for the current object. Also note that
calling url() on an existing object resets the host, port, username, password,
and all parameters.
Interally, this method calls L<C<parse_url()>|/"parse_url">.
=head2 parse_url
Takes a URI, and returns the following 7 element list:
# 0 1 2 3 4 5 6
($protocol, $username, $password, $host, $port, $path, $query_string) =
GT::WWW->parse_url($url);
URL's require, at a minimum, protocol and host, in URI form:
PROTOCOL://HOST
The URL can extend up to:
PROTOCOL://USERNAME:PASSWORD@HOST:PORT/PATH?QUERY_STRING
Only protocols known to GT::WWW are acceptable. To check if a URL is valid,
check C<$protocol>.
This method can be called as a class or object method, but not as a function.
If called as an object method, the strict option as currently set for the
object will be used; as a class method or function, an optional second
parameter can be passed in - if true, strict query string parsing mode will be
enabled.
=head2 protocol
Takes a protocol, such as 'http', 'https', 'ftp', etc. Note that when you call
protocol, you object ceases being a GT::WWW object, by becoming a GT::WWW subclass
(such as GT::WWW::http, GT::WWW::https, etc.). Before trying an unknown protocol,
you should generally call the L</protocol_supported> method - calling
C<protocol(...)> with an unsupported protocol will result in a fatal error.
=head2 protocol_supported
This method takes a protocol, such as 'http', 'https', 'ftp', etc. In order to
make sure the protocol is supported, this checks to see that it is an
internally supported protocol, and also tries to load the module to make sure
that the module can be loaded.
=head2 valid_host
Returns true in scalar context if the host appears valid, or the host and port
in list context if the host is valid. Note that no check is performed to see
whether or not the host resolves or is reachable - this simply verifies that
the host is at least valid enough to warrant a lookup.
=head2 host
Sets the host, and optionally the port (assuming the argument is of the form:
'hostname:port'). Returns a fatal error if the host is not valid. Note that
setting the host will B<reset> the port to the protocol's default value, so
this method B<must> be called before port().
=head2 port
Sets the port for the connection. This can be a name, such as "smtp", or a
numeric value. Note that the port value B<will be reset> when the host()
method is called, so setting a port B<must> happen after setting the host.
=head2 reset_port
Resets the port so that the next request will use the default port.
=head2 username
Sets or retrieves the login username.
=head2 reset_username
Removes the login username.
=head2 password
Sets the login password.
=head2 reset_password
Removes the login password.
=head2 connection_timeout
Specifies a timeout for connections, in seconds. By default, a value of 10 is
used. If you specify a false value here, the connection time out will be
system dependent; typically this is from one to several minutes. Note,
however, that the timeout is not supported on Windows systems and so should not
be depended on in code that runs on Windows systems.
=head2 path
Sets the path for the request. Any HTTP escapes (e.g. %20) are automatically
converted to the actual value (e.g. " "). If required, the path will be
automatically re-escaped before being sent to the server.
=head2 parameters
Takes a list (not a hash, since duplicate keys are permitted) of key => value
pairs. Optionally takes an extra argument - if true, the parameters are added,
not replaced - if omitted (or false), any existing parameters are deleted.
To specify a valueless parameter without a value, such as b in this example
query string:
a=1&b&c=3
Pass undef as b's value. Passing "" as the value will result in:
a=1&b=&c=3
For example, to set to two query strings above would require the following two
sets of arguments, respectively:
$www->parameters(a => 1, b => undef, c => 3);
$www->parameters(a => 1, b => "", c => 3);
To then add a "d=4" parameter to either one, you would call:
$www->parameters(d => 4, 1);
Omitting the extra "1" would cause you to erase the previously set parameters.
Values specified should B<not> be URL encoded.
If called without arguments, the list of key/value pairs is returned.
=head2 reset_parameters
Resets the parameters. You want to make sure you do this between each request
on the same object, unless using L<C<url()>|/url>, which calls this for you.
=head2 query_string
This function serves the same purpose as L<C<parameters()>|/parameters>, except
that it takes a query string as input instead of a list. Like C<parameters()>,
the default behaviour is to replace any existing parameters unless a second,
true argument is provided.
Note that if you already have your parameters in some sort of list, it is
preferable to pass them to C<parameters()> than to join them into a query
string and pass them into this function, because this function just splits them
back up into a list again.
You can also provide a query string (along with a host, path, and possibly
other data) using the L<C<url()>|/url> method.
If called without arguments, the current parameters will be joined into a valid
query string and returned.
=head2 strict
This function is used to tell the GT::WWW object to allow/disallow
standard-violating responses. This has a global effect of allowing query
strings to contain _any_ characters except for "\r", "\n", and "#" - normally,
characters such as /, ?, and various extended characters much be escaped into
%XX format. The C<strict> option may have other protocol-specific effects,
which will be indicated in each protocol's documentation.
The option defaults to non-strict.
=head2 post_data
This function allows you to pass in raw data to be posted. The data will not be
encoded. If you pass in a code reference, the data will be posted in chunks.
=head2 agent
Used to set or retrieve the User-Agent string that will be sent to the server.
If the agent string you pass starts or ends with whitespace or a comma, the
default agent will be added at the beginning of end of the User-Agent string,
respectively. This value is only meaningful to protocols supporting something
similar to the HTTP User-Agent.
=head2 default_agent
Returns the default user agent string. This will be automatically used if no
agent has been set, or if an agent ending with whitespace is specified. This
value is dependent on the protocol being used, but is typically something like
"GT::WWW::http/1.23". This method is read-only.
=head2 chunk
=head2 chunk_size
C<chunk> and C<chunk_size> are used to perform a large download in chunks. The
C<chunk()> method takes a code reference that will be called when a chunk of
data has been retrieved from the server, or a value of C<undef> to clear any
currently set chunk code. C<chunk_size()> takes a integer containing the
number bytes that you wish to retrieve at a time from the server; the C<chunk>
code reference will be called with a scalar reference containing up to
C<chunk_size> bytes.
Note that when using chunked downloading, the data will not be available using
the normal content retrieval interface.
Also note that, as of 1.024, the chunk code reference only applies to the next
get() or post() request - after each get() or post() request, the chunk_code is
cleared (in order to avoid self-references and possible memory leaks).
=head2 cancel
=head2 cancelled
The C<cancel> method can be used in conjunction with the L<C<chunk>|/chunk>
option to abort a download in progress. The chunk code will not be called
again, and the server connection will be closed. This should be used sparingly
and with care. C<cancelled> simply return a true/false value indicating
whether the operation has been cancelled. This value is reset at the beginning
of each operation.
Note that cancelling an operation is never performed automatically, and only
happens - if ever - in the C<chunk> code reference, so checking the
cancellation status is rarely needed.
=head2 debug_level
This is used to set or retrieve the debug level.
0 = no debugging
1 = debugging related to current operation
2 = adds operation details to debugging level 1
3 = adds data debugging (very large!) to debugging level 2
When passed as part of a hash to new(), the key for this option can be specified
as C<debug> instead of C<debug_level>.
=head2 error
This method will return a string containing an error that has occurred. Note
that an error may be generated even for methods that _seem_ to be correct - for
example, if a server unexpectedly closes the connection before properly
finishing the transfer, a successful return will result since the transfer was
partially successful, but an error message will still be set.
=head2 fatal_errors
This method will alter the current object's error handling behaviour such that
any errors that occur will be propogated to fatal errors. It is enabled
automatically when using the quick (i.e. objectless) forms of C<get()>,
C<head()>, and C<post()> methods which have no associated object on which
->error can be called.
=head2 file
This method is used to create a parameter for uploading a file. It takes
either one or two arguments:
2 argument form:
First argument is a B<remote> filename, second argument is either a B<local>
filename, or a GLOB reference to an open filehandle.
1 argument form:
Argument is a filename to read.
Example usage:
my $file = $www->file("foo.txt");
$www->parameters(foobar => $file, 1);
my $response = $www->post();
This will upload the file from disk named "foo.txt", using a form parameter
named "foobar". This is similar to uploading a file named "foo.txt" via the
following HTML element:
<input type="file" name="foobar">
The two argument form with two filenames is used to lie to the server about the
actual name of the file. Using a filehandle as the second argument is for use
when a filename is not available - such as an opened socket, or a file that has
been opened elsewhere in the code.
Examples:
my $file = $www->file("foo.txt", "bar.txt");
my $file2 = $www->file("foo2.txt", \*FH);
$www->parameters(foobar => $file, foobar2 => $file2, 1);
my $response = $www->post();
This will upload two files - a file named F<foo.txt> (which is actually read
from the C<bar.txt> file) specified as form parameter C<foobar>, and a second
file, specified as parameter C<foobar2>, whose content is read from the
filehandle C<FH>.
=head1 SEE ALSO
L<GT::WWW::http>
L<GT::WWW::https>
=head1 MAINTAINER
Jason Rhinelander
=head1 COPYRIGHT
Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
http://www.gossamer-threads.com/
=head1 VERSION
Revision: $Id: WWW.pm,v 1.30 2009/06/09 00:57:25 brewt Exp $
=cut