1431 lines
51 KiB
Perl
1431 lines
51 KiB
Perl
|
# ====================================================================
|
||
|
# Gossamer Threads Module Library - http://gossamer-threads.com/
|
||
|
#
|
||
|
# GT::WWW::http
|
||
|
# Author: Jason Rhinelander
|
||
|
# CVS Info : 087,071,086,086,085
|
||
|
# $Id: http.pm,v 1.31 2005/04/08 19:20:00 jagerman Exp $
|
||
|
#
|
||
|
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||
|
# ====================================================================
|
||
|
#
|
||
|
# Description:
|
||
|
# Handles HTTP GT::WWW connections
|
||
|
#
|
||
|
|
||
|
package GT::WWW::http;
|
||
|
use strict;
|
||
|
use Carp;
|
||
|
use GT::Socket::Client qw/:crlf/;
|
||
|
use GT::WWW::http::Response;
|
||
|
use GT::WWW::http::Header;
|
||
|
|
||
|
use vars qw/@ISA $VERSION/;
|
||
|
@ISA = 'GT::WWW';
|
||
|
$VERSION = sprintf "%d.%03d", q$Revision: 1.31 $ =~ /(\d+)\.(\d+)/;
|
||
|
|
||
|
use constants
|
||
|
BUFLENGTH => 4096, # The amount to read at a time from files to upload
|
||
|
CHUNK_SIZE => 64 * 1024; # The default maximum chunk size, if not specified
|
||
|
|
||
|
my $valid_username = '\x00-\x08\x0a-\x1f\x7f';
|
||
|
|
||
|
# Find out if Compress::Zlib of at least 1.14 is available. Versions prior to
|
||
|
# 1.14 had a "memory overwrite" bug in inflate(), which this module uses.
|
||
|
my $have_zlib = eval { require Compress::Zlib; Compress::Zlib->require_version(1.14) }
|
||
|
and import Compress::Zlib;
|
||
|
|
||
|
sub get {
|
||
|
my $self = shift;
|
||
|
|
||
|
$self->{redirected} = {};
|
||
|
$self->{redirect} = undef;
|
||
|
|
||
|
my $ret = $self->_get;
|
||
|
$self->chunk(undef);
|
||
|
$ret;
|
||
|
}
|
||
|
|
||
|
sub _get { # May recurse, if a 3xx status is encountered
|
||
|
my ($self, $head) = @_;
|
||
|
$self->{port} ||= $self->default_port;
|
||
|
$self->{host} or croak 'You must set a host before calling ' . ($head ? 'head()' : 'get()');
|
||
|
$self->{cancelled} = undef;
|
||
|
|
||
|
my $request_header = $self->header();
|
||
|
$request_header->replace_header(
|
||
|
Host => ($self->{port} == 80 ? $self->{host} : "$self->{host}:$self->{port}")
|
||
|
);
|
||
|
|
||
|
$self->_set_basic_headers;
|
||
|
|
||
|
CONNECT: {
|
||
|
$self->debug("Connecting...") if $self->{debug} >= 2;
|
||
|
$self->_connect or return undef; # _connect generates the error
|
||
|
$self->debug("Connected.") if $self->{debug} >= 2;
|
||
|
|
||
|
$self->{path} = '/' unless defined $self->{path} and length $self->{path};
|
||
|
my $path = $self->_escape_path;
|
||
|
|
||
|
if (my $query = $self->query_string) {
|
||
|
$path .= "?$query";
|
||
|
}
|
||
|
|
||
|
my $header = ($head ? "HEAD" : "GET") . " $path HTTP/";
|
||
|
$header .= $self->{http_10} ? "1.0" : "1.1";
|
||
|
$header .= CRLF;
|
||
|
|
||
|
$header .= $request_header;
|
||
|
|
||
|
$self->debug("Sending header:\n$header") if $self->{debug} >= 2;
|
||
|
$self->send($header)
|
||
|
or return $self->error("Could not write to the server: $!");
|
||
|
$self->debug("Header sent") if $self->{debug};
|
||
|
|
||
|
$self->debug("Reading header") if $self->{debug} >= 2;
|
||
|
my $read_header = $self->_read_header;
|
||
|
if (!$read_header and $head and $self->{reused}) {
|
||
|
$self->debug("Assuming broken server sending a body for a HEAD response; reconnecting") if $self->{debug};
|
||
|
delete $self->{sock};
|
||
|
delete $self->{reused};
|
||
|
redo CONNECT;
|
||
|
}
|
||
|
$read_header or return;
|
||
|
|
||
|
$self->debug($self->{debug} >= 2 ? "Read header: $self->{response}->{header}" : "Read header") if $self->{debug};
|
||
|
}
|
||
|
|
||
|
unless ($head or $self->response->status == 304) {
|
||
|
$self->debug("Reading body") if $self->{debug};
|
||
|
$self->_read_body or return;
|
||
|
$self->debug("Read body") if $self->{debug};
|
||
|
if ($self->{cancelled}) {
|
||
|
$self->debug("Closing socket (action cancelled in mid-download)") if $self->{debug} >= 2;
|
||
|
close $self->{sock};
|
||
|
$self->{connected} = undef;
|
||
|
}
|
||
|
}
|
||
|
elsif (!$head and $self->{debug} >= 2) {
|
||
|
$self->debug("Not reading body: Server returned status: 304 " . $self->response->status);
|
||
|
}
|
||
|
|
||
|
my $code = int($self->response->status);
|
||
|
if ($code == 300 or $code == 301 or $code == 302 or $code == 303 or $code == 307) {
|
||
|
$self->debug("Server returned status $code - redirect possible") if $self->{debug};
|
||
|
unless ($self->{no_redirect}) {
|
||
|
if (my $location = $self->response->header("Location")) {
|
||
|
my ($protocol, $username, $password, $host, $port, $path, $query_string) =
|
||
|
$self->parse_url($location);
|
||
|
|
||
|
if (!$protocol and substr($location, 0, 1) eq '/') {
|
||
|
$self->debug("Server returned standard-violating non-absolute Location: $location") if $self->{debug};
|
||
|
if ($self->{strict}) {
|
||
|
$self->debug("Strict mode enabled; not following local redirect") if $self->{debug};
|
||
|
}
|
||
|
else {
|
||
|
my $hrelaxed = '(?:[^\x00-\x08\x0a-\x1f#%/?]|%[0-9A-Fa-f]{2})+';
|
||
|
my $qrelaxed = '(?:[^\x00-\x08\x0a-\x1f#%]|%[0-9A-Fa-f]{2})+';
|
||
|
if ($location =~ m(
|
||
|
^
|
||
|
(
|
||
|
/(?:$hrelaxed(?:/(?:$hrelaxed)?)*)? # Match the path (/foo/bar.cgi)
|
||
|
)
|
||
|
(?:
|
||
|
\?($qrelaxed)?
|
||
|
)?
|
||
|
(?:
|
||
|
\#.* # Allow a possible anchor - but we don't care about it
|
||
|
)?
|
||
|
$
|
||
|
)iox) {
|
||
|
$self->debug("Strict mode not enabled; following local redirect") if $self->{debug};
|
||
|
$protocol = $self->protocol;
|
||
|
$host = $self->host;
|
||
|
$port = $self->port;
|
||
|
$path = $1;
|
||
|
$query_string = $2;
|
||
|
}
|
||
|
elsif ($self->{debug}) {
|
||
|
$self->debug("Unable to extract relative URL from '$location'");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ($protocol and $protocol eq $self->protocol and not $self->{redirected}->{$location}++) {
|
||
|
$self->debug("Redirecting to: $location") if $self->{debug};
|
||
|
|
||
|
push @{$self->{redirect}}, $self->{response};
|
||
|
$self->host($host);
|
||
|
$self->port($port) if $port;
|
||
|
if ($username) {
|
||
|
$self->username($username);
|
||
|
$self->password($password) if $self->password;
|
||
|
}
|
||
|
elsif ($self->{debug} >= 2) {
|
||
|
$self->debug("Not resetting username/password for redirect");
|
||
|
}
|
||
|
$self->path($path);
|
||
|
$self->query_string($query_string);
|
||
|
|
||
|
return $self->_get($head);
|
||
|
}
|
||
|
elsif ($self->{debug}) {
|
||
|
if ($protocol ne $self->protocol) {
|
||
|
$self->debug("Not redirecting: Unable to redirect to a different protocol");
|
||
|
}
|
||
|
elsif ($protocol) {
|
||
|
$self->debug("Not redirecting: Redirect loop detected");
|
||
|
}
|
||
|
else {
|
||
|
$self->debug("Not redirecting: '$location' is not valid (or not supported by GT::WWW)");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
elsif ($self->{debug}) {
|
||
|
$self->debug("Not redirecting: server header did not include a Location");
|
||
|
}
|
||
|
}
|
||
|
elsif ($self->{debug}) {
|
||
|
$self->debug("Not redirecting - no_redirect option is enabled");
|
||
|
}
|
||
|
}
|
||
|
return $self->response if defined wantarray;
|
||
|
}
|
||
|
|
||
|
sub post {
|
||
|
my $self = shift;
|
||
|
$self->{port} ||= $self->default_port;
|
||
|
$self->{host} or croak 'You must set a host before calling post()';
|
||
|
$self->{cancelled} = undef;
|
||
|
|
||
|
my $request_header = $self->header;
|
||
|
$request_header->replace_header(
|
||
|
Host => ($self->{port} == 80 ? $self->{host} : "$self->{host}:$self->{port}")
|
||
|
);
|
||
|
|
||
|
my @param = $self->parameters;
|
||
|
my $file_upload;
|
||
|
for (my $i = 1; $i < @param; $i += 2) {
|
||
|
if ($param[$i] and UNIVERSAL::isa($param[$i], 'GT::WWW::File')) {
|
||
|
$file_upload++;
|
||
|
last;
|
||
|
}
|
||
|
}
|
||
|
my (@ct) = $request_header->header_words('Content-Type');
|
||
|
unless (@ct) {
|
||
|
if ($file_upload) {
|
||
|
@ct = ('multipart/form-data' => undef);
|
||
|
}
|
||
|
else {
|
||
|
@ct = ('application/x-www-form-urlencoded' => undef);
|
||
|
}
|
||
|
$request_header->replace_header('Content-Type' => \@ct);
|
||
|
}
|
||
|
elsif ($ct[0] eq 'form-data') {
|
||
|
$ct[0] = 'multipart/form-data';
|
||
|
$request_header->replace_header('Content-Type' => \@ct);
|
||
|
}
|
||
|
|
||
|
my ($content, $chunked);
|
||
|
|
||
|
my @transfer_encoding = $request_header->header_words('Transfer-Encoding');
|
||
|
for (my $i = 0; $i < @transfer_encoding; $i += 2) {
|
||
|
if ($transfer_encoding[$i] eq 'chunked') {
|
||
|
$chunked++;
|
||
|
splice @transfer_encoding, $i, 2;
|
||
|
$i -= 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (@param) {
|
||
|
my %ct = @ct;
|
||
|
if (exists $ct{'multipart/form-data'}) {
|
||
|
my ($boundary, $boundary_i);
|
||
|
for (my $i = 0; $i < @ct; $i += 2) {
|
||
|
if (lc $ct[$i] eq 'boundary') {
|
||
|
$boundary = $ct[$boundary_i = $i + 1];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
($content, $boundary, my $content_length) = $self->_form_data($boundary);
|
||
|
# N.B.: The RFC recommends that boundary NOT be quoted, which means "token" characters only
|
||
|
if ($boundary_i) {
|
||
|
$ct[$boundary_i] = $boundary;
|
||
|
}
|
||
|
else {
|
||
|
push @ct, boundary => $boundary;
|
||
|
}
|
||
|
$request_header->replace_header('Content-Type' => \@ct);
|
||
|
|
||
|
$content_length or $chunked++; # If the content length cannot be obtained, we have to use chunked encoding
|
||
|
}
|
||
|
else {
|
||
|
$content = $self->query_string;
|
||
|
$request_header->replace_header('Content-Length' => length($content));
|
||
|
}
|
||
|
}
|
||
|
elsif (my $data = $self->post_data) {
|
||
|
$content = $data;
|
||
|
$chunked = 1 if ref $data;
|
||
|
$request_header->replace_header('Content-Length' => length($content)) if (! ref $data);
|
||
|
}
|
||
|
|
||
|
if ($chunked) {
|
||
|
unshift @transfer_encoding, chunked => undef;
|
||
|
$request_header->replace_header('Transfer-Encoding' => \@transfer_encoding);
|
||
|
}
|
||
|
|
||
|
$self->_set_basic_headers;
|
||
|
|
||
|
$self->debug("Connecting...") if $self->{debug} >= 2;
|
||
|
$self->_connect or return undef;
|
||
|
$self->debug("Connected.") if $self->{debug} >= 2;
|
||
|
|
||
|
$self->{path} = '/' unless defined $self->{path} and length $self->{path};
|
||
|
my $path = $self->_escape_path;
|
||
|
|
||
|
my $header = "POST $path HTTP/";
|
||
|
$header .= $self->{http_10} ? "1.0" : "1.1";
|
||
|
$header .= CRLF;
|
||
|
$header .= $request_header;
|
||
|
|
||
|
$self->debug("Sending header:\n$header") if $self->{debug} >= 2;
|
||
|
$self->send($header)
|
||
|
or return $self->error("Could not write to the server: $!");
|
||
|
$self->debug("Header sent") if $self->{debug} >= 2;
|
||
|
|
||
|
if ($request_header->contains(Expect => '100-continue')) {
|
||
|
$self->_read_header(1) or return;
|
||
|
my $response = $self->response;
|
||
|
int($response->status) == 100 or return $response;
|
||
|
}
|
||
|
|
||
|
if (ref $content) { # Content is either a string, or a code ref
|
||
|
$self->debug("Sending content to server using chunked encoding") if $self->{debug} >= 2;
|
||
|
while (defined(my $c = $content->())) {
|
||
|
if ($chunked) {
|
||
|
$self->debug(sprintf("Chunk: %x", length($c))) if $self->{debug} >= 3;
|
||
|
$self->send(sprintf("%x$CRLF", length($c)));
|
||
|
}
|
||
|
$self->debug("Content:\n$c\n") if $self->{debug} >= 3;
|
||
|
$self->send($c);
|
||
|
}
|
||
|
$self->debug("Chunked content sent") if $self->{debug} >= 2;
|
||
|
}
|
||
|
elsif (defined $content) {
|
||
|
$self->debug("Sending content") if $self->{debug} >= 2;
|
||
|
$self->debug("Content:\n$content\n") if $self->{debug} >= 3;
|
||
|
$self->send($content);
|
||
|
$self->debug("Content sent") if $self->{debug} >= 2;
|
||
|
}
|
||
|
|
||
|
$self->debug("Reading header") if $self->{debug} >= 2;
|
||
|
$self->_read_header or return;
|
||
|
$self->debug("Read header: $self->{response}->{header}") if $self->{debug} >= 2;
|
||
|
$self->debug("Reading body") if $self->{debug} >= 2;
|
||
|
$self->_read_body or return;
|
||
|
$self->debug("Read body") if $self->{debug} >= 2;
|
||
|
if ($self->{cancelled}) {
|
||
|
$self->debug("Closing socket (action cancelled in mid-download)") if $self->{debug} >= 2;
|
||
|
close $self->{sock};
|
||
|
$self->{connected} = undef;
|
||
|
}
|
||
|
|
||
|
$self->chunk(undef);
|
||
|
|
||
|
return $self->response if defined wantarray;
|
||
|
}
|
||
|
|
||
|
sub head {
|
||
|
my $self = shift;
|
||
|
|
||
|
$self->{redirected} = {};
|
||
|
$self->{redirect} = undef;
|
||
|
|
||
|
$self->_get(1);
|
||
|
}
|
||
|
|
||
|
sub default_port { 80 }
|
||
|
|
||
|
sub send {
|
||
|
my ($self, $data) = @_;
|
||
|
my $sock = $self->{sock};
|
||
|
print $sock $data;
|
||
|
}
|
||
|
|
||
|
sub cancel {
|
||
|
my $self = shift;
|
||
|
$self->{cancelled} = 1;
|
||
|
}
|
||
|
|
||
|
sub cancelled {
|
||
|
my $self = shift;
|
||
|
$self->{cancelled};
|
||
|
}
|
||
|
|
||
|
# URL escapes the path
|
||
|
sub _escape_path {
|
||
|
my $self = shift;
|
||
|
my $path = $self->{path};
|
||
|
$path =~ s#([^\w/\$.+!*'()|,;~&=:\@-])#sprintf "%%%02X", ord $1#eg;
|
||
|
|
||
|
$path;
|
||
|
}
|
||
|
|
||
|
sub _form_data {
|
||
|
my ($self, $boundary) = @_;
|
||
|
|
||
|
my @param = $self->parameters;
|
||
|
|
||
|
my (@parts, $fh_parts);
|
||
|
for (my $i = 0; $i < @param; $i += 2) {
|
||
|
my $part = $self->new_form_part(@param[$i, $i + 1]);
|
||
|
push @parts, $part;
|
||
|
$fh_parts++ if $part->fh;
|
||
|
}
|
||
|
|
||
|
if (!$boundary) {
|
||
|
if ($fh_parts) {
|
||
|
$boundary = boundary(8); # 512 random characters; hopefully good enough
|
||
|
}
|
||
|
else {
|
||
|
$boundary = boundary();
|
||
|
my $bno = 1;
|
||
|
CHECK_BOUNDARY: {
|
||
|
for (@parts) {
|
||
|
if (index($_->content, $boundary) >= 0) {
|
||
|
# The data contains this boundary - look for a better one
|
||
|
++$bno;
|
||
|
$boundary = boundary($bno > 8 ? 8 : $bno);
|
||
|
redo CHECK_BOUNDARY;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (my $i = 0; $i < @parts; $i += 2) {
|
||
|
splice @parts, $i, 0, \"--$boundary";
|
||
|
}
|
||
|
push @parts, \"--$boundary--";
|
||
|
|
||
|
my $content_length = 0;
|
||
|
for (@parts) {
|
||
|
if (ref eq 'SCALAR') {
|
||
|
$content_length += length($$_) + length(CRLF);
|
||
|
}
|
||
|
else {
|
||
|
if (my $size = $_->size) {
|
||
|
$content_length += $size + length(CRLF);
|
||
|
}
|
||
|
else { # We can't determine the size
|
||
|
$content_length = 0;
|
||
|
last;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$self->{header}->replace_header('Content-Length' => $content_length) if $content_length;
|
||
|
|
||
|
my $reading;
|
||
|
my $content = sub {
|
||
|
if ($reading) {
|
||
|
my $n = read($reading, my $buf, BUFLENGTH);
|
||
|
if ($n) {
|
||
|
$content_length -= $n if $content_length;
|
||
|
return $buf;
|
||
|
}
|
||
|
else {
|
||
|
close $reading;
|
||
|
$reading = undef;
|
||
|
$content_length -= length(CRLF) if $content_length;
|
||
|
return CRLF;
|
||
|
}
|
||
|
}
|
||
|
while () {
|
||
|
if (!@parts) {
|
||
|
defined $content_length and $content_length != 0
|
||
|
and croak "Data sent did not match calculated content-length (still have $content_length). This probably means an uploaded file changed in size during transfer.";
|
||
|
return;
|
||
|
}
|
||
|
elsif ($content_length and $content_length < 0) {
|
||
|
croak "Data sent exceeded calculated content-length. This probably means an upload file changed in size during transfer.";
|
||
|
}
|
||
|
my $p = shift @parts;
|
||
|
if (ref $p eq 'SCALAR') {
|
||
|
$$p .= CRLF;
|
||
|
while (@parts and ref $parts[0] eq 'SCALAR') {
|
||
|
$$p .= ${shift @parts} . CRLF;
|
||
|
}
|
||
|
$content_length -= length $$p if $content_length;
|
||
|
return $$p;
|
||
|
}
|
||
|
elsif (my $fh = $p->fh) {
|
||
|
my $buf = "" . $p->header;
|
||
|
my $n = read($fh, $buf, BUFLENGTH, length($buf)); # Append to $buf
|
||
|
if ($n) {
|
||
|
$reading = $fh;
|
||
|
}
|
||
|
else {
|
||
|
close $fh;
|
||
|
}
|
||
|
|
||
|
if (my $len = length $buf) {
|
||
|
$content_length -= $len if $content_length;
|
||
|
return $buf;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
my $buf = $p->header . $p->content . CRLF;
|
||
|
$content_length -= length($buf) if $content_length;
|
||
|
return $buf;
|
||
|
}
|
||
|
}
|
||
|
};
|
||
|
|
||
|
return wantarray ? ($content, $boundary, $content_length) : $content;
|
||
|
}
|
||
|
|
||
|
# Sets up the generic headers (such as Authorization and Accept-Encoding) for
|
||
|
# any request (GET, POST, or HEAD).
|
||
|
sub _set_basic_headers {
|
||
|
my $self = shift;
|
||
|
my $header = $self->header;
|
||
|
|
||
|
if (!$header->header('Accept')) {
|
||
|
$header->header(Accept => '*/*');
|
||
|
}
|
||
|
|
||
|
if ($have_zlib and !$header->header('Accept-Encoding')) {
|
||
|
$header->header('Accept-Encoding' => ['gzip' => undef, 'deflate' => undef]);
|
||
|
}
|
||
|
|
||
|
unless ($header->header('Connection')) {
|
||
|
if ($self->{http_10}) {
|
||
|
$header->header(Connection => 'close');
|
||
|
}
|
||
|
else {
|
||
|
$header->header(Connection => 'keep-alive');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
unless ($header->header('User-Agent')) {
|
||
|
$header->replace_header('User-Agent' => $self->agent);
|
||
|
}
|
||
|
|
||
|
if (defined(my $username = $self->username)) {
|
||
|
# RFC 2616
|
||
|
my $password = $self->password;
|
||
|
$password = '' if not defined $password;
|
||
|
$username =~ /[$valid_username]/
|
||
|
and croak "Invalid HTTP username: contains CTL's";
|
||
|
$username =~ /:/ and croak "Invalid HTTP username: Username cannot contain ':'";
|
||
|
my $encoded = encode_base64("$username:$password");
|
||
|
$header->replace_header(Authorization => "Basic $encoded");
|
||
|
}
|
||
|
1;
|
||
|
}
|
||
|
|
||
|
sub boundary {
|
||
|
my $c = shift or return "XyZzAYz";
|
||
|
|
||
|
my $return = "XyZzAYz";
|
||
|
for (1 .. $c ** 2) {
|
||
|
$return .= ('a' .. 'z', 'A' .. 'Z', 0 .. 9)[rand 62];
|
||
|
}
|
||
|
$return;
|
||
|
}
|
||
|
|
||
|
# Returns the header object for the current object, creating a new one if
|
||
|
# necessary. Note that this is the _request_ header - the response header is
|
||
|
# available through the header() method of the response object (see response())
|
||
|
sub header {
|
||
|
my $self = shift;
|
||
|
$self->{header} ||= $self->new_header;
|
||
|
if (@_) {
|
||
|
$self->{header}->header(@_);
|
||
|
}
|
||
|
else {
|
||
|
$self->{header};
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Attempts to read the header from the server. Note that a true argument tells
|
||
|
# the method NOT to continue if a "100 Continue" header is received - without
|
||
|
# the true argument, the 100 Continue header will be skipped.
|
||
|
sub _read_header {
|
||
|
my ($self, $ignore_100) = @_;
|
||
|
|
||
|
my $response = $self->new_response;
|
||
|
my $header = $self->new_header;
|
||
|
$self->{response} = $response;
|
||
|
$response->{header} = $header;
|
||
|
|
||
|
$self->{sock}->readline(my $status);
|
||
|
defined $status or return $self->error("Server did not send status line");
|
||
|
|
||
|
if ($self->{reused} and $status !~ /\S/) {
|
||
|
# When using keep-alive connections, there will (sometimes?) be a blank line here
|
||
|
$self->{sock}->readline($status);
|
||
|
}
|
||
|
$status =~ m|^HTTP/(1\.[10]) ([1-5]\d\d)(?: ([^\r\n]*))?|
|
||
|
or return $self->error("Server returned invalid status line: $status");
|
||
|
$response->server_version($1);
|
||
|
my $int_status = $2;
|
||
|
$response->status($int_status, $3 || '');
|
||
|
|
||
|
my ($line, $last, $last_value);
|
||
|
while ($self->{sock}->readline($line) and $line =~ /[^\015\012]/) {
|
||
|
$line =~ y/\015\012//d;
|
||
|
$line =~ s/%([0-9a-fA-F]{2})/chr hex $1/eg;
|
||
|
if ($last and $line =~ s/^[ \t]+/ /) {
|
||
|
$last_value .= $line;
|
||
|
}
|
||
|
else {
|
||
|
if ($last) {
|
||
|
$header->header($last, $last_value);
|
||
|
}
|
||
|
($last, $last_value) = split /:\s*/, $line, 2;
|
||
|
for ($last, $last_value) { s/%([0-9a-fA-F]{2})/chr hex $1/eg }
|
||
|
}
|
||
|
}
|
||
|
if ($last) {
|
||
|
$header->header($last, $last_value);
|
||
|
}
|
||
|
|
||
|
# If we received a 100 status, read the header over again
|
||
|
return $self->_read_header if $int_status == 100 and not $ignore_100;
|
||
|
1;
|
||
|
}
|
||
|
|
||
|
sub _read_body {
|
||
|
my $self = shift;
|
||
|
|
||
|
my $response = $self->response;
|
||
|
my $header = $response->header;
|
||
|
my ($chunked, $gzip, $deflate);
|
||
|
|
||
|
my @te = $header->header_words('Transfer-Encoding');
|
||
|
for (my $i = 0; $i < @te; $i += 2) {
|
||
|
if ($te[$i] eq 'chunked') {
|
||
|
$chunked++;
|
||
|
}
|
||
|
if ($have_zlib) {
|
||
|
if ($te[$i] eq 'gzip') {
|
||
|
$gzip++;
|
||
|
$deflate++;
|
||
|
}
|
||
|
elsif ($te[$i] eq 'deflate') {
|
||
|
$deflate++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if ($have_zlib and !$deflate) {
|
||
|
my @ce = $header->header_words('Content-Encoding');
|
||
|
for (my $i = 0; $i < @ce; $i += 2) {
|
||
|
if ($ce[$i] eq 'gzip') {
|
||
|
$gzip++;
|
||
|
$deflate++;
|
||
|
}
|
||
|
elsif ($ce[$i] eq 'deflate') {
|
||
|
$deflate++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if ($self->{debug}) {
|
||
|
my $encoding = join ', ', ($gzip ? 'gzipped' : ()), ($deflate && !$gzip ? 'deflated' : ()),
|
||
|
($chunked ? 'chunked' : ());
|
||
|
$encoding ||= 'normal (not chunked or compressed)';
|
||
|
$self->debug("Server headers indicate $encoding encoding");
|
||
|
}
|
||
|
|
||
|
my $body;
|
||
|
$body = '' unless $self->{chunk_code};
|
||
|
if ($chunked) {
|
||
|
$self->debug("Server sending in chunked encoding") if $self->{debug};
|
||
|
my ($bytes, $chunks, $data_length, $content_length) = (0, 0, 0, 0);
|
||
|
|
||
|
my ($gzip_buffer, $inflator, $gzip_header_removed) = ('');
|
||
|
if ($deflate) {
|
||
|
# A negative WindowBits is an "undocumented" (it is referenced only as
|
||
|
# a comment in the zlib source) zlib feature to not look for a zlib
|
||
|
# header. Only "gzip" data needs this option, since the gzip header is
|
||
|
# different - and handled seperately.
|
||
|
$inflator = inflateInit($gzip ? (-WindowBits => -MAX_WBITS()) : ());
|
||
|
}
|
||
|
|
||
|
CHUNK: while () {
|
||
|
$self->{sock}->readline(my $line)
|
||
|
or return $self->error("Could not read from server: " . $self->{sock}->error);
|
||
|
$line =~ /^([0-9A-Fa-f]+)\s*$/
|
||
|
or return $self->error("Server told about chunk encoding, but did not transfer properly (read '$line', wanted hex)");
|
||
|
$bytes = hex($1);
|
||
|
|
||
|
unless ($bytes) {
|
||
|
$self->debug("Finished reading chunked data") if $self->{debug} >= 2;
|
||
|
last;
|
||
|
}
|
||
|
|
||
|
$self->debug("Reading $bytes byte chunk") if $self->{debug} >= 2;
|
||
|
|
||
|
my $b = $bytes;
|
||
|
$chunks++;
|
||
|
if ($deflate) {
|
||
|
$self->{sock}->readblock(my $chunk, $b);
|
||
|
$data_length += length $chunk;
|
||
|
$gzip_buffer .= $chunk;
|
||
|
if ($gzip and not $gzip_header_removed) {
|
||
|
_remove_gzip_header(\$gzip_buffer) and $gzip_header_removed++;
|
||
|
}
|
||
|
my ($output, $status) = $inflator->inflate(\$gzip_buffer);
|
||
|
if ($status != Z_OK() and $status != Z_STREAM_END()) {
|
||
|
return $self->error("Decompression failed (zlib error status: $status)");
|
||
|
}
|
||
|
my $size = length($output);
|
||
|
|
||
|
$content_length += $size;
|
||
|
$self->debug("After inflation, read:\n$output") if $self->{debug} >= 3;
|
||
|
|
||
|
if ($self->{chunk_code}) {
|
||
|
my $chunk_size = $self->{chunk_size} || CHUNK_SIZE;
|
||
|
while (length($output)) {
|
||
|
my $block = \substr($output, 0, $chunk_size);
|
||
|
$self->debug("Passing block to chunk_code") if $self->{debug} >= 2;
|
||
|
$self->{chunk_code}->($block);
|
||
|
$$block = ''; # Clears the $output substring referenced above
|
||
|
if ($self->{cancelled}) {
|
||
|
$self->debug("Current operation cancelled!") if $self->{debug};
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$body .= $output;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
if ($self->{chunk_code}) {
|
||
|
my $chunk_size = $self->{chunk_size} || CHUNK_SIZE;
|
||
|
while ($b > $chunk_size) {
|
||
|
$self->debug("Reading $chunk_size bytes ($b remaining in current chunk)") if $self->{debug} >= 2;
|
||
|
$self->{sock}->readblock(my $block, $chunk_size);
|
||
|
my $size = length $block;
|
||
|
$data_length += $size;
|
||
|
$b -= $size;
|
||
|
$self->debug("Read chunk:\n$block") if $self->{debug} >= 3;
|
||
|
$self->debug("Passing chunk to chunk_code") if $self->{debug} >= 2;
|
||
|
$self->{chunk_code}->(\$block);
|
||
|
if ($self->{cancelled}) {
|
||
|
$self->debug("Current operation cancelled!") if $self->{debug};
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if ($b) {
|
||
|
$self->debug("Reading $b bytes of chunk") if $self->{debug} >= 2;
|
||
|
$self->{sock}->readblock(my $block, $b);
|
||
|
$data_length += length $block;
|
||
|
$self->debug("Read chunk:\n$block") if $self->{debug} >= 3;
|
||
|
if ($self->{chunk_code}) {
|
||
|
$self->debug("Passing chunk to chunk_code") if $self->{debug} >= 2;
|
||
|
$self->{chunk_code}->(\$block);
|
||
|
if ($self->{cancelled}) {
|
||
|
$self->debug("Current operation cancelled!") if $self->{debug};
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$body .= $block;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
$self->{sock}->readline($line); # Read a CRLF after the chunk
|
||
|
$self->debug("Finished reading chunk") if $self->{debug} >= 2;
|
||
|
}
|
||
|
|
||
|
$self->debug("Read $data_length bytes " . ($deflate ? "of $content_length bytes compresses data " : '') . "in $chunks chunk" . ($chunks == 1 ? "" : "s"))
|
||
|
if $chunks and $self->{debug};
|
||
|
}
|
||
|
elsif ($deflate) {
|
||
|
my ($read, $size);
|
||
|
my $remaining;
|
||
|
if (my $content_length = $header->header('Content-Length')) {
|
||
|
$remaining = $content_length;
|
||
|
}
|
||
|
|
||
|
# A negative WindowBits is an "undocumented" (it is referenced only as
|
||
|
# a comment in the zlib source) zlib feature to not look for a zlib
|
||
|
# header. Only "gzip" data needs this option, since the gzip header is
|
||
|
# different - and handled seperately.
|
||
|
my $inflator = inflateInit($gzip ? (-WindowBits => -MAX_WBITS()) : ());
|
||
|
|
||
|
if ($self->{chunk_code}) {
|
||
|
my $header_removed;
|
||
|
my ($buffer, $data) = ('', '');
|
||
|
my $chunk_size = $self->{chunk_size} || CHUNK_SIZE;
|
||
|
while () {
|
||
|
# From my testing, a typical HTML page gzips to anywhere from
|
||
|
# 1/10 to 1/2 the size, so for these purposes we try to read
|
||
|
# and inflate about 1/2 the maximum block size at a time, in
|
||
|
# the hopes that what we grab will be close to the proper
|
||
|
# amount of data. This value is essentially arbitrary.
|
||
|
my $read_size = int($chunk_size * 0.5) || 1;
|
||
|
$read_size = $remaining if $remaining and $remaining < $chunk_size;
|
||
|
$self->debug("Reading block of $read_size compressed bytes" . ($remaining ? " ($remaining remaining)" : ""))
|
||
|
if $self->{debug} >= 2;
|
||
|
$self->{sock}->readblock(my $block, $read_size);
|
||
|
my $block_size = length($block);
|
||
|
if (defined $remaining) {
|
||
|
if (!$block_size or $block_size < $read_size) {
|
||
|
return $self->error("Could not read all content from server: " . ($self->{sock}->error || "Unknown error"));
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
if (!$block_size or $block_size < $read_size) {
|
||
|
last;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$read += $block_size;
|
||
|
$remaining -= $block_size if defined $remaining;
|
||
|
if (not $header_removed and $gzip) {
|
||
|
$data .= $block;
|
||
|
if (_remove_gzip_header(\$data)) {
|
||
|
$header_removed = 1;
|
||
|
}
|
||
|
else {
|
||
|
if (defined $remaining and $remaining <= 0 and not length $data) {
|
||
|
return $self->error("No content after removing gzip header from gzip compressed data");
|
||
|
}
|
||
|
next;
|
||
|
}
|
||
|
$block = $data;
|
||
|
}
|
||
|
|
||
|
my ($output, $status) = $inflator->inflate(\$block);
|
||
|
if ($status != Z_OK() and $status != Z_STREAM_END()) {
|
||
|
return $self->error("Decompression failed (zlib error status: $status)");
|
||
|
}
|
||
|
$size += length($output);
|
||
|
$buffer .= $output;
|
||
|
$self->debug("After inflation, read:\n$output") if $self->{debug} >= 3;
|
||
|
while (length($buffer) > $chunk_size) {
|
||
|
my $block = \substr($buffer, 0, $chunk_size);
|
||
|
$self->debug("Passing block to chunk_code") if $self->{debug} >= 2;
|
||
|
$self->{chunk_code}->($block);
|
||
|
$$block = ''; # Clears the $buffer substring referenced above
|
||
|
if ($self->{cancelled}) {
|
||
|
$self->debug("Current operation cancelled!") if $self->{debug};
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
last if defined $remaining and $remaining <= 0;
|
||
|
}
|
||
|
if (length $buffer) {
|
||
|
$self->debug("Passing remaining data to chunk_code") if $self->{debug} >= 2;
|
||
|
$self->{chunk_code}->(\$buffer);
|
||
|
if ($self->{cancelled}) {
|
||
|
$self->debug("Current operation cancelled!") if $self->{debug};
|
||
|
return 1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
if ($self->{debug} >= 2) {
|
||
|
if (defined $remaining) { $self->debug("Reading $remaining compressed bytes from server") }
|
||
|
else { $self->debug("Reading all compressed bytes from server") }
|
||
|
}
|
||
|
$self->{sock}->readblock(my $block, defined($remaining) ? $remaining : -1);
|
||
|
$read = length($block);
|
||
|
$body = Compress::Zlib::memGunzip(\$block);
|
||
|
$size = length($body);
|
||
|
|
||
|
defined $body or return $self->error("Decompression failed");
|
||
|
|
||
|
$self->debug("After inflation, read:\n$body") if $self->{debug} >= 3;
|
||
|
}
|
||
|
|
||
|
$self->debug("Read $read gziped bytes, containing $size bytes of uncompressed data")
|
||
|
if $self->{debug};
|
||
|
}
|
||
|
else {
|
||
|
my $chunk_size = $self->{chunk_size} || CHUNK_SIZE;
|
||
|
my $content_length = $header->header('Content-Length');
|
||
|
my $remaining;
|
||
|
my $read_size;
|
||
|
if ($content_length) {
|
||
|
$read_size = $chunk_size < $content_length ? $chunk_size : $content_length;
|
||
|
$remaining = $content_length;
|
||
|
}
|
||
|
elsif (defined $content_length) { # i.e. $content_length == 0
|
||
|
$self->debug("Document contains no data (headers contain Content-Length: 0)") if $self->{debug};
|
||
|
}
|
||
|
else {
|
||
|
$read_size = -1;
|
||
|
}
|
||
|
|
||
|
my $read;
|
||
|
while ($read_size) {
|
||
|
$read_size = $remaining if $remaining and $remaining < $read_size;
|
||
|
|
||
|
if ($self->{debug} >= 2) {
|
||
|
if ($read_size > 0) { $self->debug("Reading block of $read_size bytes ($remaining remaining) from server") }
|
||
|
else { $self->debug("Reading all available data from server (blocking read)") }
|
||
|
}
|
||
|
|
||
|
my $bytes_read = $self->{sock}->readblock(my $block, $read_size);
|
||
|
$read += $bytes_read;
|
||
|
$self->debug("Read body:\n$block") if $self->{debug} >= 3;
|
||
|
|
||
|
if (defined $remaining) {
|
||
|
if (!$bytes_read or $bytes_read < $read_size) {
|
||
|
return $self->error("Could not read all content from server: " . ($self->{sock}->error || "Unknown error"));
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$self->debug("Read $bytes_read bytes from server") if $self->{debug};
|
||
|
last if not $bytes_read;
|
||
|
}
|
||
|
|
||
|
$remaining -= $bytes_read if defined $remaining;
|
||
|
|
||
|
if ($self->{chunk_code}) {
|
||
|
$self->debug("Passing data to chunk_code") if $self->{debug} >= 2;
|
||
|
$self->{chunk_code}->(\$block);
|
||
|
if ($self->{cancelled}) {
|
||
|
$self->debug("Current operation cancelled!") if $self->{debug};
|
||
|
return 1;
|
||
|
}
|
||
|
if (defined $content_length) {
|
||
|
last if $read >= $content_length;
|
||
|
}
|
||
|
elsif (defined $remaining) {
|
||
|
last if $bytes_read < $read_size;
|
||
|
}
|
||
|
else {
|
||
|
last;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
$body .= $block;
|
||
|
|
||
|
if (defined $content_length) {
|
||
|
last if length($body) >= $content_length;
|
||
|
}
|
||
|
elsif (defined $remaining) {
|
||
|
last if $bytes_read < $read_size
|
||
|
}
|
||
|
else {
|
||
|
last;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
$response->{content} = $body;
|
||
|
|
||
|
1;
|
||
|
}
|
||
|
|
||
|
# gzip constants
|
||
|
use constants
|
||
|
MAGIC1 => 0x1f,
|
||
|
MAGIC2 => 0x8b,
|
||
|
OSCODE => 3,
|
||
|
|
||
|
FTEXT => 1,
|
||
|
FHCRC => 2,
|
||
|
FEXTRA => 4,
|
||
|
FNAME => 8,
|
||
|
FCOMMENT => 16,
|
||
|
NULL => chr(0),
|
||
|
RESERVED => 0xE0,
|
||
|
|
||
|
MIN_HDR_SIZE => 10;
|
||
|
|
||
|
# This sub returns undef if the scalar ref passed in does not contain a gzip
|
||
|
# header. If a header is found, it is removed and the value 1 is returned.
|
||
|
sub _remove_gzip_header {
|
||
|
my $gzip = shift;
|
||
|
return undef if length($$gzip) < MIN_HDR_SIZE;
|
||
|
my $substr_base = 0;
|
||
|
|
||
|
# C = unsigned char, V = unsigned long
|
||
|
my ($magic1, $magic2, $method, $flags, $time, $xflags, $oscode) = unpack('CCCCVCC', $$gzip);
|
||
|
|
||
|
return undef unless
|
||
|
$magic1 == MAGIC1 and
|
||
|
$magic2 == MAGIC2 and
|
||
|
$method == Z_DEFLATED() and
|
||
|
!($flags & RESERVED);
|
||
|
|
||
|
$substr_base += MIN_HDR_SIZE;
|
||
|
|
||
|
# FEXTRA defines an extra field. We skip it.
|
||
|
if ($flags & FEXTRA) {
|
||
|
return undef if length($$gzip) - $substr_base < 2;
|
||
|
|
||
|
my ($extra_len) = unpack("x${substr_base}v", $$gzip); # v = unsigned short
|
||
|
$extra_len += 2;
|
||
|
return undef if length($$gzip) - $substr_base < $extra_len;
|
||
|
|
||
|
$substr_base += $extra_len;
|
||
|
}
|
||
|
|
||
|
# FNAME stores the original file name - we skip it as well.
|
||
|
if ($flags & FNAME) {
|
||
|
my $name_end = index($$gzip, NULL, $substr_base);
|
||
|
return undef if $name_end < 0;
|
||
|
$substr_base += $name_end + 1;
|
||
|
}
|
||
|
|
||
|
# FCOMMENT means the gzip header contains a comment
|
||
|
if ($flags & FCOMMENT) {
|
||
|
my $comment_end = index($$gzip, NULL, $substr_base);
|
||
|
return undef if $comment_end < 0;
|
||
|
$substr_base += $comment_end + 1;
|
||
|
}
|
||
|
|
||
|
# FHCRC stores the CRC for the gzip file, which we can't use
|
||
|
if ($flags & FHCRC) {
|
||
|
return undef if length($$gzip) - $substr_base < 2 ;
|
||
|
$substr_base += 2;
|
||
|
}
|
||
|
|
||
|
substr($$gzip, 0, $substr_base) = '';
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
sub response { $_[0]->{response} }
|
||
|
|
||
|
sub protocol {
|
||
|
my $self = shift;
|
||
|
|
||
|
if (@_ and $_[0] ne $self->{protocol}) {
|
||
|
croak "You cannot change protocols once a protocol has been set";
|
||
|
}
|
||
|
|
||
|
return $self->{protocol};
|
||
|
}
|
||
|
|
||
|
# Sets HTTP/1.0 mode. By default, all connections use HTTP/1.1
|
||
|
sub http_10 {
|
||
|
my $self = shift;
|
||
|
if (@_) {
|
||
|
if ($self->{sock}) {
|
||
|
croak "Cannot change HTTP connection version once connected";
|
||
|
}
|
||
|
$self->{http_10} = shift && 1 || undef; # Force a value of 1 or undef
|
||
|
}
|
||
|
$self->{http_10};
|
||
|
}
|
||
|
|
||
|
sub no_redirect {
|
||
|
my $self = shift;
|
||
|
if (@_) {
|
||
|
$self->{no_redirect} = shift && 1 || undef;
|
||
|
}
|
||
|
$self->{no_redirect};
|
||
|
}
|
||
|
|
||
|
sub redirects {
|
||
|
my $self = shift;
|
||
|
return $self->{redirect} ? @{$self->{redirect}} : undef;
|
||
|
}
|
||
|
|
||
|
sub _connect {
|
||
|
my $self = shift;
|
||
|
|
||
|
my $reuse = 0;
|
||
|
my ($connect_time, $r, $rheader, $close, $sclose, $timeout); # Here for debugging purposes
|
||
|
if (not $self->{http_10} and $self->{sock} and $connect_time = $self->{connected}) {
|
||
|
$self->debug("Check existing connection for keep-alive appropriateness") if $self->{debug} >= 2;
|
||
|
if ($r = $self->response
|
||
|
and not ($close = $self->header->contains(Connection => 'close'))
|
||
|
and $r->server_version eq '1.1'
|
||
|
and $rheader = $r->header
|
||
|
and $self->{host} eq $self->{connected_host} and $self->{port} eq $self->{connected_port}) {
|
||
|
|
||
|
unless ($sclose = $rheader->contains(Connection => 'close')) {
|
||
|
if ($rheader->header('Keep-Alive')) {
|
||
|
# Apache is nice enough to send a header such as: Keep-Alive: timeout=15, max=100
|
||
|
# IIS, on the other hand, doesn't let us know when it's going to time out, nor is
|
||
|
# it consistent in the amount of time it gives before timing out.
|
||
|
my @p = $rheader->header_words("Keep-Alive");
|
||
|
for (my $i = 0; $i < @p; $i += 2) {
|
||
|
$timeout = $p[$i + 1] if $p[$i] eq 'timeout';
|
||
|
}
|
||
|
}
|
||
|
$timeout ||= 10; # Completely arbitary value - Apache's default is 15, IIS takes some
|
||
|
# arbitrary number of seconds to timeout, but it seems to be at least a minute.
|
||
|
|
||
|
# If the last connection was made within the last ($timeout) seconds, then
|
||
|
# we can attempt to reuse the connection.
|
||
|
$reuse = 1 if $connect_time > time - $timeout;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ($reuse) {
|
||
|
$self->debug("Reusing established Keep-Alive connection") if $self->{debug};
|
||
|
$self->{connected} = time;
|
||
|
$self->{reused} = 1;
|
||
|
}
|
||
|
else {
|
||
|
if ($self->{sock}) {
|
||
|
$self->debug("Connection not persisting") if $self->{debug};
|
||
|
|
||
|
if ($self->{debug} >= 2) {
|
||
|
if ($self->{http_10}) { $self->debug("... The 'http_10' option has been enabled") }
|
||
|
elsif (!$self->response) { $self->debug("... No response object from last request") }
|
||
|
elsif ($close) { $self->debug("... Current header specifies 'Connection: close' for next request; honouring for current request") }
|
||
|
elsif ($r->server_version ne '1.1') { $self->debug("... Server HTTP version (" . ($r->server_version) . ") is not 1.1") }
|
||
|
elsif (!$rheader) { $self->debug("... No response header from last request") }
|
||
|
elsif ($self->{host} ne $self->{connected_host}) { $self->debug("... Host has changed since last request") }
|
||
|
elsif ($self->{port} ne $self->{connected_port}) { $self->debug("... Server port has changed since last request") }
|
||
|
elsif ($sclose) { $self->debug("... Server indicated 'Connection: close' in last response header") }
|
||
|
elsif ($connect_time > time - $timeout) { $self->debug("... Too long as passed since last request") }
|
||
|
else { $self->debug("Unknown reason [bug]") } # Shouldn't happen - hence "[bug]"
|
||
|
}
|
||
|
}
|
||
|
|
||
|
my $conn_timeout = $self->connection_timeout;
|
||
|
# If a connection is already established, reassigning to $self->{sock} will free
|
||
|
# the old one, thereby implicitly closing it.
|
||
|
$self->{sock} = GT::Socket::Client->open(
|
||
|
host => $self->{host},
|
||
|
port => $self->{port},
|
||
|
($conn_timeout ? (timeout => $conn_timeout) : ()),
|
||
|
debug => $self->{debug},
|
||
|
(UNIVERSAL::isa($self, 'GT::WWW::https') ? (ssl => 1) : ())
|
||
|
);
|
||
|
$self->{sock} or return $self->error("Could not connect to $self->{host}: " . GT::Socket::Client->error);
|
||
|
|
||
|
$self->{connected} = time;
|
||
|
$self->{connected_host} = $self->{host};
|
||
|
$self->{connected_port} = $self->{port};
|
||
|
delete $self->{reused};
|
||
|
}
|
||
|
|
||
|
return 1;
|
||
|
}
|
||
|
|
||
|
# Not really "real" base64 encoding - this doesn't break into lines of 76
|
||
|
# characters, and so doesn't have an EOL character either.
|
||
|
sub encode_base64 {
|
||
|
my $res = '';
|
||
|
pos($_[0]) = 0; # In case something has previously adjusted pos
|
||
|
while ($_[0] =~ /(.{1,45})/gs) {
|
||
|
$res .= substr(pack(u => $1), 1, -1);
|
||
|
}
|
||
|
$res =~ tr|` -_|AA-Za-z0-9+/|;
|
||
|
|
||
|
my $padding = (3 - length($_[0]) % 3) % 3;
|
||
|
$res =~ s/.{$padding}$/'=' x $padding/e if $padding;
|
||
|
$res;
|
||
|
}
|
||
|
|
||
|
|
||
|
sub new_header { shift; GT::WWW::http::Header->new(@_) }
|
||
|
sub new_response { shift; GT::WWW::http::Response->new(@_) }
|
||
|
sub new_form_part { shift; GT::WWW::http::Part->new(@_) }
|
||
|
|
||
|
package GT::WWW::http::Part;
|
||
|
|
||
|
use GT::Socket::Client qw/:crlf/;
|
||
|
use Carp;
|
||
|
|
||
|
sub new {
|
||
|
my ($class, $k, $v) = @_;
|
||
|
my $self = {};
|
||
|
bless $self, ref($class) || $class;
|
||
|
my $h = $self->{header} = $self->new_header;
|
||
|
$h->_separator(';');
|
||
|
|
||
|
if (!ref $v) {
|
||
|
$self->{content} = $v;
|
||
|
$h->_force_quotes;
|
||
|
$h->header(
|
||
|
'Content-Disposition' => ['form-data' => undef, name => $k]
|
||
|
);
|
||
|
$h->_unforce_quotes;
|
||
|
}
|
||
|
elsif (ref $v and UNIVERSAL::isa($v, 'GT::WWW::File')) {
|
||
|
$h->_force_quotes;
|
||
|
$h->header(
|
||
|
'Content-Disposition' => ['form-data' => undef, name => $k, filename => $v->filename]
|
||
|
);
|
||
|
$h->_unforce_quotes;
|
||
|
|
||
|
if ($h->header('Content')) {
|
||
|
$self->{content} = $h->header('Content');
|
||
|
$h->delete_header('Content');
|
||
|
}
|
||
|
else {
|
||
|
my $fh = $v->fh;
|
||
|
|
||
|
my $size;
|
||
|
unless (-f $fh and $size = -s _) {
|
||
|
# If we can't get the proper size (for example, /proc/cpuinfo),
|
||
|
# or if this filehandle isn't a filehandle (socket, block
|
||
|
# device, etc.) then we need to slurp the file.
|
||
|
local $/;
|
||
|
$self->{content} = <$fh>;
|
||
|
close $fh;
|
||
|
$size = length($self->{content});
|
||
|
}
|
||
|
else {
|
||
|
$self->{content} = $fh;
|
||
|
}
|
||
|
|
||
|
require GT::MIMETypes;
|
||
|
$h->header(
|
||
|
'Content-Type' => scalar GT::MIMETypes->guess_type($v->filename),
|
||
|
'Content-Length' => $size
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$self;
|
||
|
}
|
||
|
|
||
|
# Returns the size of the part, in bytes, _IF_ it can be retrieved. Cases where
|
||
|
# it can't be retrieved include where you passed a file to new() which is not a
|
||
|
# regular file (e.g. /dev/audio) or a dynamic file (e.g. /proc/mounts), which
|
||
|
# reports a size of 0. (This also means that an empty file will cause the size
|
||
|
# to be unavailable). Note that the size includes the size of the header.
|
||
|
sub size {
|
||
|
my $self = shift;
|
||
|
if (ref $self->{content} and -f $self->{content} and my $size = -s _) {
|
||
|
my $header = $self->header;
|
||
|
return length("$header") + $size;
|
||
|
}
|
||
|
elsif (!ref $self->{content}) {
|
||
|
my $header = $self->header;
|
||
|
return length("$header") + length($self->{content});
|
||
|
}
|
||
|
return undef;
|
||
|
}
|
||
|
|
||
|
sub fh {
|
||
|
my $self = shift;
|
||
|
if (ref $self->{content} eq 'GLOB') {
|
||
|
return $self->{content};
|
||
|
}
|
||
|
else {
|
||
|
return undef;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
sub header {
|
||
|
my $self = shift;
|
||
|
$self->{header};
|
||
|
}
|
||
|
|
||
|
sub content {
|
||
|
my $self = shift;
|
||
|
$self->{content};
|
||
|
}
|
||
|
|
||
|
sub new_header { shift; GT::WWW::http::Header->new(@_) }
|
||
|
|
||
|
1;
|
||
|
|
||
|
__END__
|
||
|
|
||
|
=head1 NAME
|
||
|
|
||
|
GT::WWW::http - HTTP interface for GT::WWW
|
||
|
|
||
|
=head1 SYNOPSIS
|
||
|
|
||
|
use GT::WWW;
|
||
|
my $www = GT::WWW->new();
|
||
|
$www->protocol('http');
|
||
|
# any valid GT::WWW methods here
|
||
|
# ...
|
||
|
my $header = $www->header;
|
||
|
$header->header("Some-Http-Header" => $value);
|
||
|
$header->delete_header("Some-Other-Http-Header");
|
||
|
|
||
|
my $response = $www->get() or die "Could not connect to server: " . $www->error;
|
||
|
|
||
|
my $status = $response->status;
|
||
|
my $response_code = int $status; # For example, 200, 404, 500, etc.
|
||
|
my $response_str = "$status"; # For example, 'OK', 'Not Found', 'Internal Server Error', etc.
|
||
|
|
||
|
if ($status) {
|
||
|
# This will be true if the status code is a successful one - in other
|
||
|
# words, true for 2xx responses, false for others
|
||
|
print "Response successful. Content:\n$response\n";
|
||
|
}
|
||
|
else {
|
||
|
die "Request was not successful ($response_code $response_str)\n";
|
||
|
}
|
||
|
|
||
|
=head1 DESCRIPTION
|
||
|
|
||
|
GT::WWW::http handles HTTP connections for GT::WWW. It uses some overloading
|
||
|
to assist in the ease of use without sacrificing functionality.
|
||
|
|
||
|
This document does not cover the basics of a GT::WWW object; those are covered
|
||
|
by L<GT::WWW>.
|
||
|
|
||
|
=head1 METHODS
|
||
|
|
||
|
=head2 header
|
||
|
|
||
|
This method returns the GT::WWW::http::Header object that will be (or has been)
|
||
|
sent to the HTTP server. See L<GT::WWW::http::Header> for information on using
|
||
|
and manipulating a header object.
|
||
|
|
||
|
Note that you can add headers without first getting a header object by simply
|
||
|
specifying the headers as arguments to header(). Normally, you would call:
|
||
|
|
||
|
$www->header->header('X-Foo' => 'bar');
|
||
|
|
||
|
This shortcut allows for:
|
||
|
|
||
|
$www->header('X-Foo' => 'bar');
|
||
|
|
||
|
Check L<GT::WWW::http::Header> for valid arguments to the header() method.
|
||
|
|
||
|
=head2 http_10
|
||
|
|
||
|
This method can be used I<before> initiating a request on the object to force
|
||
|
HTTP/1.0 communication with the HTTP server. By default HTTP/1.1 connections
|
||
|
are used. Note that HTTP/1.1 is strongly recommended as this module supports
|
||
|
keep-alive connections only when using HTTP/1.1. To force HTTP/1.0
|
||
|
communication, pass a true value to this method, or a false value to use the
|
||
|
default HTTP/1.1 connections. Returns true if HTTP/1.0 connections will be
|
||
|
used.
|
||
|
|
||
|
=head2 strict
|
||
|
|
||
|
This works as described in GT::WWW. Specifically, in addition to the loose
|
||
|
query string restrictions, this allows relative URL Location: redirects
|
||
|
(HTTP/1.1 specifically states that Location: redirects MUST be absolute).
|
||
|
|
||
|
=head2 no_redirect
|
||
|
|
||
|
This method is used before a request to indicate that automatic, seemless
|
||
|
redirection should B<not> take place. By default, when a server responds with
|
||
|
an acceptable, properly-formed 3xx response allowing a redirection, this module
|
||
|
will automatically perform the redirection, unless this option has been
|
||
|
enabled. To enable, call this method with a true value, or to disable, a false
|
||
|
value. Returns true if automatic redirection is enabled.
|
||
|
|
||
|
Note that redirections will only be performed on GET requests.
|
||
|
|
||
|
=head2 redirects
|
||
|
|
||
|
If redirections are enabled (i.e. the no_redirect option has not been turned
|
||
|
on), you can call the redirects() method to get a list of response objects
|
||
|
created while performing redirections. Typically this will be just one, but
|
||
|
more are possible.
|
||
|
|
||
|
=head2 response
|
||
|
|
||
|
Returns the response object for the last request. When automatic redirection
|
||
|
is enabled, this will be the response object for the final request. The
|
||
|
response object can be used is multiple ways, which are described below, in the
|
||
|
L</"RETURN VALUES"> section, and in L<GT::WWW::http::Response>.
|
||
|
|
||
|
=head2 cancel
|
||
|
|
||
|
This works as described in L<GT::WWW/cancel>, with one exception: if cancelling a
|
||
|
request immediately before a redirect takes place, only the current request is
|
||
|
cancelled - the redirect still occurs. Note that cancelling is likely to be a
|
||
|
resource hit in such a case because the connection cannot be reused and a new
|
||
|
one must be established - typically, to the same server.
|
||
|
|
||
|
=head1 RETURN VALUES
|
||
|
|
||
|
The return values of the L<C<get()>|GT::WWW/get>, L<C<head()>|GT::WWW/head>,
|
||
|
and L<C<post()>|GT::WWW/post> methods are simply the response object for the
|
||
|
request, which can also be obtained by calling the L<C<response()>|/response>
|
||
|
method after completing the request.
|
||
|
|
||
|
The full documentation for the response object is covered in
|
||
|
L<GT::WWW::http::Response>, however the below description is provided for a
|
||
|
brief overview. The following examples assume that "C<$response>" is an object
|
||
|
that has been obtained by calling get(), head(), post(), or response().
|
||
|
|
||
|
=head2 Status
|
||
|
|
||
|
The status of the request is available via the ->status method of the response
|
||
|
object. It is made up of three pieces of data - status code, status string,
|
||
|
and success.
|
||
|
|
||
|
To get the status code (e.g. 500, 200, etc.), simply use the status as a number:
|
||
|
|
||
|
my $status = int($response->status);
|
||
|
|
||
|
To get the status string (e.g. "500 Internal Server Error", "200 OK"), use the
|
||
|
status as a string:
|
||
|
|
||
|
my $status = "" . $response->status;
|
||
|
|
||
|
And finally, to get the success of the request, simply use status in boolean
|
||
|
context:
|
||
|
|
||
|
if ($response->status) {
|
||
|
|
||
|
Success for HTTP is defined by any 200-level response status code. A request
|
||
|
that returns "200 OK" will be pass the above if statement, while a request that
|
||
|
returned "500 Internal Server Error" will fail.
|
||
|
|
||
|
=head2 Content
|
||
|
|
||
|
The content of the last request is available by simply using the response
|
||
|
object as a string:
|
||
|
|
||
|
my $content = "$response";
|
||
|
|
||
|
You should take note, however, that if you are using the
|
||
|
L<C<chunk()>|GT::WWW/chunk> method no content will be available in this way.
|
||
|
|
||
|
Also note that the response object is an object, not a string, so anything
|
||
|
beyond basic string comparison/concatenation should not occur on the response
|
||
|
object itself. See L<GT::WWW::http::Response/CAVEATS> for more details.
|
||
|
|
||
|
=head2 Headers
|
||
|
|
||
|
The header() method of the response object returns a GT::WWW::http::Header
|
||
|
object which gives you easy access to the headers returned by the server with
|
||
|
the request.
|
||
|
|
||
|
As a special shortcut, calling header() with arguments will call the
|
||
|
L<header()|GT::WWW::http::Header/header> method of the header object with the
|
||
|
arguments provided. This allows you to optionally change this:
|
||
|
|
||
|
my $location = $response->header->header('Location');
|
||
|
|
||
|
into the shorter and clearer:
|
||
|
|
||
|
my $location = $response->header('Location');
|
||
|
|
||
|
Calling header() without arguments returns the header object for the response.
|
||
|
|
||
|
=head1 SEE ALSO
|
||
|
|
||
|
L<GT::WWW>
|
||
|
L<GT::WWW::http::Response>
|
||
|
L<GT::WWW::http::Header>
|
||
|
RFC 2616: L<http://www.ietf.org/rfc/rfc2616.txt>
|
||
|
|
||
|
=head1 MAINTAINER
|
||
|
|
||
|
Jason Rhinelander
|
||
|
|
||
|
=head1 COPYRIGHT
|
||
|
|
||
|
Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
||
|
http://www.gossamer-threads.com/
|
||
|
|
||
|
=head1 VERSION
|
||
|
|
||
|
Revision: $Id: http.pm,v 1.31 2005/04/08 19:20:00 jagerman Exp $
|
||
|
|
||
|
=cut
|