# ==================================================================== # Gossamer Threads Module Library - http://gossamer-threads.com/ # # GT::WWW # Author: Jason Rhinelander # CVS Info : # $Id: WWW.pm,v 1.25 2005/04/08 19:25:31 jagerman Exp $ # # Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved. # ==================================================================== # # Description: # Implements retrieving and posting to WWW sites, using HTTP or HTTPS. HTTPS # support requires Net::SSLeay. # # GT::URI should be considered deprecated in favour of this module. # package GT::WWW; use strict; use Carp; use Symbol; use Net::servent; use vars qw/%PUBLIC $ERRORS %PROTOCOL $HOST_RE $VERSION @EXPORT_OK @ISA/; require Exporter; $VERSION = sprintf "%d.%03d", q$Revision: 1.25 $ =~ /(\d+)\.(\d+)/; @EXPORT_OK = qw/get post head getprint postprint/; @ISA = 'Exporter'; %PUBLIC = ( # Public methods, passable via new(). The values are the equivelant methods. server => 'server', port => 'port', path => 'path', username => 'username', password => 'password', header => 'header', parameters => 'parameters', query_string => 'query_string', debug => 'debug_level', debug_level => 'debug_level', agent => 'agent', chunk => 'chunk', chunk_size => 'chunk_size' ); %PROTOCOL = ( http => __PACKAGE__ . "::http", https => __PACKAGE__ . "::https", # ftp => __PACKAGE__ . "::ftp" ); # This must capture two and only two subpatterns - host and port. # The following _REQUIRES_ /x regex modifier! I'd use (?x), but it doesn't # work the same way in 5.004 that it does now. $HOST_RE = '( (?: \w(?:[-\w]*\w)? \. )* # "foo.foo." of "foo.foo.com" \w(?:[-\w]*\w)? # and the "com", or possibly just a host like "penguin" | (?:(?:[01]?\d?\d|2[0-4]\d|25[0-5])\.){3} # Match an IP, but only 0-255 in (?:[01]?\d?\d|2[0-4]\d|25[0-5]) # each field - nothing > 255 ) (?::(\d+)?)? # port - The RFC says: http://foo:/path is valid '; #use Socket; # GT::Socket's read/write methods aren't sufficient and are too # buggy. The only way to make use of it would be to use it to connect, then # directly access the filehandle, so instead of loading all the useless code, # GT::WWW modules should use GT::Socket::Client instead. sub new { my $class = shift; my $self = {}; bless $self, ref($class) || $class; if (@_) { if (ref $_[0] eq 'HASH' or @_ % 2 == 0) { my $input = ref $_[0] eq 'HASH' ? shift : {@_}; $self->debug_level(delete $input->{debug}) if exists $input->{debug}; # Handle the protocol before the other options, because some might be available to all protocols if (exists $input->{protocol}) { my $proto = delete $input->{protocol}; $self->protocol($proto); } for my $key (keys %$input) { croak "Invalid parameter '$key'" unless substr($key, 0, 1) ne '_' and $self->can($key); $self->$key($input->{$key}); } } elsif (@_ == 1 and not ref $_[0]) { $self->url(@_); } else { croak "Unknown arguments: @_"; } } $self->{debug} ||= 0; # So that: 'if $self->{debug} >= 2' won't generate warnings $self; } # Figures out the arguments, and returns a properly set up GT::WWW subclass. # Used by the quick get* and post* methods/functions below. sub _quick_args { (my $called_from = (caller(1))[3]) =~ s/.*:://; my ($class, $url); if (@_ == 0 or @_ == 1 and UNIVERSAL::isa($_[0], __PACKAGE__)) { # Subclasses override get(), so this is either a broken subclass or # calling get() on a GT::WWW object. croak "Usage: $called_from(URL), GT::WWW->$called_from(URL), or set a URL first"; # As soon as a URL is set on the object, the object is reblessed. } elsif (@_ == 1) { # function: (GT::WWW::)get(URL) ($class, $url) = (__PACKAGE__, shift); } elsif (@_ == 2) { if (ref $_[0] eq __PACKAGE__) { croak "Usage: GT::WWW->$called_from(URL) or $called_from(URL); \$gtwww->$called_from(URL) is not permitted"; } elsif (ref $_[0] or $_[0] ne __PACKAGE__) { # $other->get(URL) or Other->get(URL) - if it gets here, $other's # class is broken croak "Usage: GT::WWW->$called_from(URL) or $called_from(URL) (Subclass " . (ref $_[0] or $_[0]) . " is probably broken: ->$called_from() method came to GT::WWW)"; } else { ($class, $url) = (@_); } } else { croak "Usage: GT::WWW->$called_from(URL), \$gtwww->$called_from(URL), or $called_from(URL)"; } $url or croak 'Error: No URL specified'; $class->new({ fatal_errors => 1, url => $url }); } # This should be subclassed by all subclasses. The function below should be # called only as a function (perl -MGT::WWW=get -e 'print get("...url...")') sub get { my $www = &_quick_args; $www->get; } # Basically, this accomplishes the same as: print GT::WWW->get(URL), except # that it also sets up chunked downloading so that it can efficiently be used # with large pages. sub getprint { my $www = &_quick_args; $www->chunk(sub { print ${$_[0]} }); $www->chunk_size(4096); $www->get(); } sub post { my $www = &_quick_args; $www->post(); } sub postprint { my $www = &_quick_args; $www->chunk(sub { print ${$_[0]} }); $www->chunk_size(4096); $www->post(); } sub head { my $www = &_quick_args; $www->head; } sub cancel { } # No ops by default sub cancelled { } sub parse_url { my ($self, $url) = splice @_, 0, 2; my $strict = shift if not ref $self; my $valid_user = '(?:[\w\$.+!*\'()|,;?&=-]|%[0-9A-Fa-f]{2})+'; my $hsegment = '(?:[\w\$.+!*\'()|,;~&=:@-]|%[0-9A-Fa-f]{2})+'; my $hrelaxed = '(?:[^\x00-\x08\x0a-\x1f#%/?]|%[0-9A-Fa-f]{2})+'; my $qrelaxed = '(?:[^\x00-\x08\x0a-\x1f#%]|%[0-9A-Fa-f]{2})+'; my $proto_re = join "|", map quotemeta, keys %PROTOCOL; # The following regex is designed primarily for http and https URL's, and # thus allows things that don't necessarily make sense for other protocols. # i.e.: ftp://jagerman:password@my.server/foo/bar.txt?asdf=zxcv#anchor # ^^^^^^^^^^^^^^^^^ # This _could_ allow for protocol extensions, for example 'type=ascii' for # FTP requests. my ($protocol, $username, $password, $server, $port, $path, $query_string) = (ref $self ? $self->{strict} : $strict) ? $url =~ m( ^ ($proto_re) :// (?: ($valid_user) # username (?: :($valid_user)? # password )? @ )? $HOST_RE ( /(?:$hsegment(?:/(?:$hsegment)?)*)? # Match the path (/foo/bar.cgi) )? (?: \?($hsegment)? )? (?: \#.* # Allow a possible anchor - but we don't care about it )? $ )iox : $url =~ m( ^ ($proto_re) :// (?: ($valid_user) # username (?: :($valid_user)? # password )? @ )? $HOST_RE ( /(?:$hrelaxed(?:/(?:$hrelaxed)?)*)? # Match the path (/foo/bar.cgi) )? (?: \?($qrelaxed)? )? (?: \#.* # Allow a possible anchor - but we don't care about it )? $ )iox; return ($protocol, $username, $password, $server, $port, $path, $query_string); } sub url { my $self = shift; if (!@_) { my $proto = $self->protocol; my $host = $self->host; $proto and $host or return undef; my $url = "$proto://"; my $username = $self->username; my $password = $self->password; if ($username) { $url .= $username; $url .= ":$password" if $password; $url .= "@"; } $url .= "$host"; my $port = $self->port; $url .= ":$port" unless $port == $self->default_port; my $path = $self->path; $url .= $path if $path; my $query = $self->query_string; $url .= "?$query" if $query; return $url; } my $url = shift; my ($protocol, $username, $password, $server, $port, $path, $query_string) = $self->parse_url($url); unless ($protocol) { croak "Invalid URL: '$url'"; } $self->protocol($protocol); if (defined $username) { $username =~ s/%([0-9A-Fa-f])/chr hex $1/eg; $self->username($username); } else { $self->reset_username() } if (defined $password) { $password =~ s/%([0-9A-Fa-f])/chr hex $1/eg; $self->password($password); } else { $self->reset_password() } $self->host($server); $port ? $self->port($port) : $self->reset_port; $self->path($path); defined $query_string ? $self->query_string($query_string) : $self->reset_parameters; return 1; } sub protocol { my $self = shift; if (@_) { my $protocol = shift; croak "Protocol '$protocol' not supported" unless $self->protocol_supported($protocol); $self->{protocol} = lc $protocol; my $pkg = $PROTOCOL{$self->{protocol}}; bless $self, $pkg; } $self->{protocol}; } sub protocol_supported { my ($self, $protocol) = @_; unless ($protocol) { $self->debug("Protocol not supported: No protocol entered") if ref $self and $self->{debug}; return undef; } my $pkg = $PROTOCOL{lc $protocol}; unless ($pkg) { $self->debug("Protocol '$pkg' not supported") if ref $self and $self->{debug}; return undef; } (my $mod = $pkg) =~ s|::|/|g; $mod .= ".pm"; my $loaded_ok = eval { require $mod }; unless ($loaded_ok) { $self->debug("Protocol '$pkg' not supported: require $mod failed: $@") if ref $self and $self->{debug}; return undef; } unless ($pkg->isa(__PACKAGE__)) { $self->debug("Protocol '$pkg' not supported: does not inherit from " . __PACKAGE__) if ref $self and $self->{debug}; return undef; } return 1; } # Takes a host and port ("host:port") - returns true if valid in scalar # context, (host, port) in list context, or undef if invalid. sub valid_host { my ($self, $host) = @_; return $host && $host =~ /^$HOST_RE$/x; } sub host { my $self = shift; if (@_) { my $host = shift; $self->{host} = undef; croak "Invalid hostname '$host' specified" unless ($host, my $port) = $self->valid_host($host); $self->{host} = $host; $port ? $self->port($port) : $self->reset_port; } $self->{host}; } sub port { my $self = shift; if (@_) { my $port = shift; unless ($port and $port =~ /^[0-9]+$/) { my $s = getservbyname($port) or croak "No such port: '$port'"; $port = $s->port; } $self->{port} = $port; } $self->{port} || $self->default_port; } sub post_data { my $self = shift; if (@_) { $self->{post_data} = shift; $self->debug("Setting post_data to '$self->{post_data}'") if $self->{debug} >= 2; } $self->{post_data}; } sub reset_port { my $self = shift; delete $self->{port}; } sub username { my $self = shift; if (@_) { $self->{username} = shift; $self->debug("Setting username to '$self->{username}'") if $self->{debug} >= 2; } $self->{username}; } sub reset_username { my $self = shift; $self->debug("Username reset") if $self->{username} and $self->{debug} >= 2; delete $self->{username}; } sub password { my $self = shift; if (@_) { $self->{password} = shift; } $self->{password}; } sub reset_password { my $self = shift; delete $self->{password}; } sub connection_timeout { my $self = shift; if (@_) { $self->{conn_timeout} = shift; } exists $self->{conn_timeout} ? $self->{conn_timeout} : 10; } sub path { my $self = shift; if (@_) { my $path = shift; $path = '' unless defined $path; $path =~ s/%([0-9A-Fa-f]{2})/chr hex $1/eg; $path = '/' . $path unless substr($path, 0, 1) eq '/'; $self->{path} = $path; } $self->{path}; } # Replaces the parameters hash after parsing the query string. Optionally takes # a third argument - if true, the parameters are added, not replaced. Note that # the query_string is only an interface to the parameters() method, and will be # recreated before being sent to the server. # Calling query_string without arguments in produces a query string (with # necessary escaping) from all parameters that have been set. sub query_string { my $self = shift; if (@_) { my ($query, $add) = @_; $query = '' if not defined $query; # An empty or undefined query string can be used to clear parameters my $hsegment = '(?:[\w\$.+!*\'()|,;~&=:@-]|%[0-9A-Fa-f]{2})*'; my $qrelaxed = '(?:[^\x00-\x08\x0a-\x1f#%]|%[0-9A-Fa-f]{2})*'; $self->{strict} ? $query =~ /^$hsegment$/o : $query =~ /^$qrelaxed$/ or croak "Invalid query string '$query'"; unless ($add) { $self->{params} = []; } for (split /[&;]/, $query) { my @kv = split /=/, $_, 2; $kv[0] =~ s/%([0-9A-Fa-f]{2})/chr hex $1/eg; $kv[0] =~ y/+/ /; if ($kv[1]) { $kv[1] =~ s/%([0-9A-Fa-f]{2})/chr hex $1/eg; $kv[1] =~ y/+/ /; } push @{$self->{params}}, @kv[0, 1]; # Will use "undef" for b in this case: a=1&b&c=2 } return 1; } else { my $ret = ''; return $ret if !$self->{params} or !@{$self->{params}}; for (my $i = 0; $i < @{$self->{params}}; $i += 2) { my ($k, $v) = @{$self->{params}}[$i, $i + 1]; $k =~ s/([^\w. -])/sprintf "%%%02X", ord $1/eg; $k =~ y/ /+/; if ($v) { $v =~ s/([^\w. -])/sprintf "%%%02X", ord $1/eg; $v =~ y/ /+/; } $ret .= $k; $ret .= "=$v" if defined $v; $ret .= '&'; } chop $ret; return $ret; } } sub parameters { my $self = shift; my @ret; @ret = @{$self->{params}} if $self->{params} and defined wantarray; if (@_) { my @params = @_; @params = @{$params[0]} if @params == 1 and ref $params[0] eq 'ARRAY'; my $add; $add = pop @params if @params % 2; if ($add) { push @{$self->{params}}, @params; } else { $self->{params} = \@params; } } @ret; } sub reset_parameters { my $self = shift; delete $self->{params}; } sub strict { my $self = shift; if (@_) { $self->{strict} = shift() ? 1 : undef; } $self->{strict}; } sub agent { my $self = shift; my $ret = $self->{agent}; if (@_) { my $agent = shift; $agent =~ /[\x00-\x08\x0a-\x1f\x7f]/ and croak "Invalid User-Agent '$agent'"; $self->{agent} = $agent; } unless ($ret) { $ret = $self->default_agent() } else { if ($ret =~ /[\s,:;]$/) { $ret .= $self->default_agent() } if ($ret =~ /^[\s,:;]/) { $ret = $self->default_agent() . $ret } } $ret; } sub default_agent { my $self = shift; my $pkg = ref $self || $self || __PACKAGE__; my $pstash; if ($pstash = $::{"$pkg\::"} and $pstash->{VERSION} and my $scalar = *{\$pstash->{VERSION}}{SCALAR}) { return "$pkg/$$scalar"; } else { return __PACKAGE__ . '/' . $VERSION; } } sub chunk { my $self = shift; if (@_) { my $coderef = shift; ref $coderef eq 'CODE' or not defined $coderef or croak "Usage: \$www->chunk(CODEREF | undef)"; $self->{chunk_code} = $coderef; return 1; } $self->{chunk_code}; } sub chunk_size { my $self = shift; if (@_) { my $chunk_size = shift; defined $chunk_size and $chunk_size > 0 or croak 'Usage: $www->chunk_size(BYTES)'; $self->{chunk_size} = $chunk_size; } $self->{chunk_size}; } sub debug { my $self = @_ > 1 ? shift : __PACKAGE__; $self = ref $self if ref $self; carp "$self: @_"; } sub debug_level { my $self = shift; if (@_) { $self->{debug} = shift; } $self->{debug}; } sub fatal_errors { my $self = shift; if (@_) { $self->{fatal_errors} = shift; } $self->{fatal_errors}; } sub error { my $self = shift; if (@_) { $self->{error} = shift; croak $self->{error} if $self->{fatal_errors}; $self->debug("Error: $self->{error}") if $self->{debug}; return undef; } $self->{error}; } sub file { shift if @_ and UNIVERSAL::isa($_[0], __PACKAGE__); GT::WWW::File->new(@_); } package GT::WWW::File; # This package is used when a protocol can take a file as input (for example, # an HTTP POST file upload). To get a "file" object, call GT::WWW->file(params). # The parameters are defined in the POD. use Carp; sub new { my ($class, $filename, $handle) = @_; my $self = {}; bless $self, ref($class) || $class; defined $filename and $filename =~ m{([^\x00\x1f\x7f\\/]+)$} and $filename ne '.' and $filename ne '..' or croak 'No (or invalid) filename specified. Usage: ' . __PACKAGE__ . '->new(FILENAME, PATH_OR_GLOBREF)'; $self->{filename} = $1; if ($handle and ref $handle eq 'GLOB' and fileno($handle)) { $self->{fh} = $handle; } else { $handle = $filename if not defined $handle; if ($handle) { if (!-r $handle) { croak "File specified ($handle) does not exist, or is not readable"; } elsif (-d _) { croak "File specified ($handle) is a directory"; } else { my $fh = \do { local *FH; *FH }; open $fh, "< $handle" or croak "Open to open file specified ($handle): $!"; binmode $fh; $self->{fh} = $fh; } } else { croak 'No opened globref or filename specified. Usage: ' . __PACKAGE__ . '->new(FILENAME, PATH_OR_GLOBREF)'; } } $self; } sub fh { my $self = shift; return $self->{fh}; } # Returns the size of the file, if available, or undef if it can't be # determined (such as a socket, device (e.g. /dev/audio), or special file (e.g. # /proc/cpuinfo)). sub size { my $self = shift; if (-f $self->{fh} and my $size = -s _) { return $size; } return undef; } sub filename { my $self = shift; $self->{filename}; } 1; __END__ =head1 NAME GT::WWW - Multi-protocol retrieving and posting, related in functionality to LWP. =head1 DESCRIPTION GT::WWW is designed to provide a common interface for multiple protocols (as of this writing, only HTTP and HTTPS, however others are planned) and handles HEAD, GET, and POST requests. For non-HTTP-based protocols, what, exactly, a "HEAD", "GET", or "POST" request is depends on the module in question. For example, with FTP "GET" might download a file, while "POST" might upload one to the server, and "HEAD" might return just the size of the file. The modules under GT::WWW B be used directly; this module should be used instead. The documentation here describes the options common to all protocols - however you should check the POD of the protocol subclasses (GT::WWW::http, GT::WWW::https, etc.) to see any extra options or methods that those modules provide. =head1 SYNOPSIS Quick way: use GT::WWW; my $www = GT::WWW->get("http://server.com/page"); ... = GT::WWW->post("http://server.com/page"); ... = GT::WWW->head("http://server.com/page"); ... = GT::WWW->...("http://user:pass@server.com/page"); ... = GT::WWW->...("https://server.com/page"); # This query string will be parsed and passed as POST input: ... = GT::WWW->post("http://server.com/page?foo=bar;bar=foo"); Longer, but more capable way: use GT::WWW; my $request = GT::WWW->new(); $request->protocol("http"); $request->host("server.com"); $request->port(8080); $request->path("/path/foo.cgi"); $request->username("user"); $request->password("pass"); $request->parameters(foo => "bar", bar => "foo"); equivelant to the above, using ->url(): $request->url("http://user:pass@server.com:8080/path/foo.cgi?foo=bar;bar=foo"); Now call $request->get(), $request->post(), or $request->head(). Very quick way to print a page: perl -MGT::WWW=get -e 'print get("http://server.com/page?foo=bar&bar=foo")' =head1 METHODS Note that all methods that set values (such as host(), port(), etc.) also return the value when called without any argument. =head2 new Call new() to get a new GT::WWW object. You can call it without arguments to get a generic GT::WWW object, or use arguments as described below. =over 4 =item URL You can call new with a single scalar argument - a URL to be parsed. The URL is of the same format as taken by the url() method. =item HASH You can alternatively call new() with a hash (or hash reference) of options. Each of the methods described below can be passed in to new in the form of C value> pairs - the methods will be called with the values specified automatically. =back =head2 head =head2 get =head2 post These are the methods used to tell the module to actually connect to the server and download the requested page. When used as GT::WWW class methods or function calls (but B as methods on GT::WWW objects or sub-objects), they take a single URL as an argument. This call creates an internal GT::WWW object, turns on L|/fatal_errors>, passes the URL to L|/url>, then calls the appropriate C, C, or C method of the resulting protocol-specific object. Note, however, that once you have specified a protocol (either via L|/protocol>, or as part of a url passed to L|/url>) your object ceases to be a GT::WWW object and becomes a protocol-specific GT::WWW subclass. All subclasses provide their own versions of these methods. The subclassed methods are not described here because they may not be supported for all protocols, and their return value(s) may differ from one protocol to the next. For more details, see the modules listed in the L section. Generally, get() and post() return an overloaded object that can be used as a string to get the content (i.e. for printing), but see the notes in the CAVEATS section of L for anything more complicated than concatenation or printing. =head2 url Takes a URL as argument. The URL is parsed into several fields: C, C, C, C, C, C, and C, then each of those properties are set for the current object. Also note that calling url() on an existing object resets the host, port, username, password, and all parameters. Interally, this method calls L|/"parse_url">. =head2 parse_url Takes a URI, and returns the following 7 element list: # 0 1 2 3 4 5 6 ($protocol, $username, $password, $host, $port, $path, $query_string) = GT::WWW->parse_url($url); URL's require, at a minimum, protocol and host, in URI form: PROTOCOL://HOST The URL can extend up to: PROTOCOL://USERNAME:PASSWORD@HOST:PORT/PATH?QUERY_STRING Only protocols known to GT::WWW are acceptable. To check if a URL is valid, check C<$protocol>. This method can be called as a class or object method, but not as a function. If called as an object method, the strict option as currently set for the object will be used; as a class method or function, an optional second parameter can be passed in - if true, strict query string parsing mode will be enabled. =head2 protocol Takes a protocol, such as 'http', 'https', 'ftp', etc. Note that when you call protocol, you object ceases being a GT::WWW object, by becoming a GT::WWW subclass (such as GT::WWW::http, GT::WWW::https, etc.). Before trying an unknown protocol, you should generally call the L method - calling C with an unsupported protocol will result in a fatal error. =head2 protocol_supported This method takes a protocol, such as 'http', 'https', 'ftp', etc. In order to make sure the protocol is supported, this checks to see that it is an internally supported protocol, and also tries to load the module to make sure that the module can be loaded. =head2 valid_host Returns true in scalar context if the host appears valid, or the host and port in list context if the host is valid. Note that no check is performed to see whether or not the host resolves or is reachable - this simply verifies that the host is at least valid enough to warrant a lookup. =head2 host Sets the host, and optionally the port (assuming the argument is of the form: 'hostname:port'). Returns a fatal error if the host is not valid. Note that setting the host will B the port to the protocol's default value, so this method B be called before port(). =head2 port Sets the port for the connection. This can be a name, such as "smtp", or a numeric value. Note that the port value B when the host() method is called, so setting a port B happen after setting the host. =head2 reset_port Resets the port so that the next request will use the default port. =head2 username Sets or retrieves the login username. =head2 reset_username Removes the login username. =head2 password Sets the login password. =head2 reset_password Removes the login password. =head2 connection_timeout Specifies a timeout for connections, in seconds. By default, a value of 10 is used. If you specify a false value here, the connection time out will be system dependent; typically this is from one to several minutes. Note, however, that the timeout is not supported on Windows systems and so should not be depended on in code that runs on Windows systems. =head2 path Sets the path for the request. Any HTTP escapes (e.g. %20) are automatically converted to the actual value (e.g. " "). If required, the path will be automatically re-escaped before being sent to the server. =head2 parameters Takes a list (not a hash, since duplicate keys are permitted) of key => value pairs. Optionally takes an extra argument - if true, the parameters are added, not replaced - if omitted (or false), any existing parameters are deleted. To specify a valueless parameter without a value, such as b in this example query string: a=1&b&c=3 Pass undef as b's value. Passing "" as the value will result in: a=1&b=&c=3 For example, to set to two query strings above would require the following two sets of arguments, respectively: $www->parameters(a => 1, b => undef, c => 3); $www->parameters(a => 1, b => "", c => 3); To then add a "d=4" parameter to either one, you would call: $www->parameters(d => 4, 1); Omitting the extra "1" would cause you to erase the previously set parameters. Values specified should B be URL encoded. If called without arguments, the list of key/value pairs is returned. =head2 reset_parameters Resets the parameters. You want to make sure you do this between each request on the same object, unless using L|/url>, which calls this for you. =head2 query_string This function serves the same purpose as L|/parameters>, except that it takes a query string as input instead of a list. Like C, the default behaviour is to replace any existing parameters unless a second, true argument is provided. Note that if you already have your parameters in some sort of list, it is preferable to pass them to C than to join them into a query string and pass them into this function, because this function just splits them back up into a list again. You can also provide a query string (along with a host, path, and possibly other data) using the L|/url> method. If called without arguments, the current parameters will be joined into a valid query string and returned. =head2 strict This function is used to tell the GT::WWW object to allow/disallow standard-violating responses. This has a global effect of allowing query strings to contain _any_ characters except for "\r", "\n", and "#" - normally, characters such as /, ?, and various extended characters much be escaped into %XX format. The C option may have other protocol-specific effects, which will be indicated in each protocol's documentation. The option defaults to non-strict. =head2 post_data This function allows you to pass in raw data to be posted. The data will not be encoded. If you pass in a code reference, the data will be posted in chunks. =head2 agent Used to set or retrieve the User-Agent string that will be sent to the server. If the agent string you pass starts or ends with whitespace or a comma, the default agent will be added at the beginning of end of the User-Agent string, respectively. This value is only meaningful to protocols supporting something similar to the HTTP User-Agent. =head2 default_agent Returns the default user agent string. This will be automatically used if no agent has been set, or if an agent ending with whitespace is specified. This value is dependent on the protocol being used, but is typically something like "GT::WWW::http/1.23". This method is read-only. =head2 chunk =head2 chunk_size C and C are used to perform a large download in chunks. The C method takes a code reference that will be called when a chunk of data has been retrieved from the server, or a value of C to clear any currently set chunk code. C takes a integer containing the number bytes that you wish to retrieve at a time from the server; the C code reference will be called with a scalar reference containing up to C bytes. Note that when using chunked downloading, the data will not be available using the normal content retrieval interface. Also note that, as of 1.024, the chunk code reference only applies to the next get() or post() request - after each get() or post() request, the chunk_code is cleared (in order to avoid self-references and possible memory leaks). =head2 cancel =head2 cancelled The C method can be used in conjunction with the L|/chunk> option to abort a download in progress. The chunk code will not be called again, and the server connection will be closed. This should be used sparingly and with care. C simply return a true/false value indicating whether the operation has been cancelled. This value is reset at the beginning of each operation. Note that cancelling an operation is never performed automatically, and only happens - if ever - in the C code reference, so checking the cancellation status is rarely needed. =head2 debug_level This is used to set or retrieve the debug level. 0 = no debugging 1 = debugging related to current operation 2 = adds operation details to debugging level 1 3 = adds data debugging (very large!) to debugging level 2 When passed as part of a hash to new(), the key for this option can be specified as C instead of C. =head2 error This method will return a string containing an error that has occured. Note that an error may be generated even for methods that _seem_ to be correct - for example, if a server unexpectedly closes the connection before properly finishing the transfer, a successful return will result since the transfer was partially successful, but an error message will still be set. =head2 fatal_errors This method will alter the current object's error handling behaviour such that any errors that occur will be propogated to fatal errors. It is enabled automatically when using the quick (i.e. objectless) forms of C, C, and C methods which have no associated object on which ->error can be called. =head2 file This method is used to create a parameter for uploading a file. It takes either one or two arguments: 2 argument form: First argument is a B filename, second argument is either a B filename, or a GLOB reference to an open filehandle. 1 argument form: Argument is a filename to read. Example usage: my $file = $www->file("foo.txt"); $www->parameters(foobar => $file, 1); my $response = $www->post(); This will upload the file from disk named "foo.txt", using a form parameter named "foobar". This is similar to uploading a file named "foo.txt" via the following HTML element: The two argument form with two filenames is used to lie to the server about the actual name of the file. Using a filehandle as the second argument is for use when a filename is not available - such as an opened socket, or a file that has been opened elsewhere in the code. Examples: my $file = $www->file("foo.txt", "bar.txt"); my $file2 = $www->file("foo2.txt", \*FH); $www->parameters(foobar => $file, foobar2 => $file2, 1); my $response = $www->post(); This will upload two files - a file named F (which is actually read from the C file) specified as form parameter C, and a second file, specified as parameter C, whose content is read from the filehandle C. =head1 SEE ALSO L L =head1 MAINTAINER Jason Rhinelander =head1 COPYRIGHT Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved. http://www.gossamer-threads.com/ =head1 VERSION Revision: $Id: WWW.pm,v 1.25 2005/04/08 19:25:31 jagerman Exp $ =cut