discourse-legacysite-perl/site/glist/lib/GT/WWW/http/Header.pm
2024-06-17 21:49:12 +10:00

650 lines
20 KiB
Perl

# ====================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::WWW::http::Header
# Author: Jason Rhinelander
# CVS Info :
# $Id: Header.pm,v 1.8 2004/02/17 01:33:08 jagerman Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ====================================================================
#
# Description:
# Header object for GT::WWW::http request/response headers.
#
package GT::WWW::http::Header;
use strict;
use Carp;
use GT::Socket::Client qw/CRLF/;
use overload
'""' => \&format_headers,
bool => \&boolean;
my $ctls = '\x00-\x1f\x7f'; # Control characters (CTL in HTTP 1.1 RFC 2616)
my $ctls_without_tab = '\x00-\x08\x0a-\x1f\x7f';
my $separators = '()<>@,;:\\\\"/\[\]?={} \t'; # Separators
my $token = "[^$ctls$separators]"; # HTTP "token" (RFC 2616)
my $quoted_string = qq{"((?:\\\\.|[^$ctls_without_tab"])*)"}; # HTTP 1.1 quoted-string.
my %Private;
sub new {
my $class = shift;
$class = ref $class if ref $class;
my $self = [];
bless $self, $class;
}
sub boolean { 1 } # So you can you do things like: $header or die
# Adds one or more headers. Takes a list of headers => value pairs.
# Without arguments, returns a list of all header names.
# With just one argument, returns all value(s) for that header (case-
# insensitive).
# When setting headers, you can pass in an array reference for the header
# value. The array will be passed as a list to join_words, and the return used
# as the header value.
# Sets a _join_words separator to something other than , - typically ;
sub _separator {
my ($self, $sep) = @_;
$Private{$self}->{separator} = $sep if $sep;
}
# Forces _join_words to put "quotes" around values. You should call this, add
# the header that needs the quotes, then call ->_unforce_quotes;.
sub _force_quotes {
my $self = shift;
$Private{$self}->{force_quotes} = 1;
}
sub _unforce_quotes {
my $self = shift;
$Private{$self}->{force_quotes} = 0;
}
sub header {
my $self = shift;
if (@_ == 1) {
# Requesting a header, ie. $obj->header("Content-Type")
my $header = lc shift;
my @return;
for (my $i = 0; $i < @$self; $i += 2) {
if (lc $self->[$i] eq $header) {
push @return, $self->[$i + 1];
last unless wantarray;
}
}
return wantarray ? @return : $return[0];
}
elsif (@_) {
@_ % 2 and croak "Invalid parameters to header: Odd number of elements passed to header()";
while (@_) {
my ($k, $v) = splice @_, 0, 2;
if (ref $v eq 'ARRAY') {
$v = $self->join_words(@$v);
}
push @$self, $k, $v;
}
return 1;
}
else {
my @return;
for (my $i = 0; $i < @$self; $i++) {
push @return, $self->[$i];
}
return @return;
}
}
sub header_words {
my ($self, $header) = @_;
$header or croak "Usage: \$header->header_words(HEADER_NAME)";
my @result;
for (my $i = 0; $i < @$self; $i += 2) {
if (lc $self->[$i] eq lc $header) {
push @result, $self->split_words($self->[$i + 1]);
}
}
return @result;
}
sub split_words {
shift if $_[0] and UNIVERSAL::isa($_[0], __PACKAGE__);
my $str = shift or return ();
my @result;
# Pretend $str is: video/x-mng,image/png, foo=bar, image/gif;q=0.3,asdf/zxcv="y,uc;k";q="0.2";blah="a;b,c",*/*;q=0.1
while (length $str) {
if ($str =~ s/^\s*([^$ctls\s=,;]+)\s*//) { # parameter 'token' or 'attribute'
push @result, $1;
my @val;
# The goal here is to get this array containing (given the above example) undef for
# "video/x-mng", "bar" for "foo", [undef, "q", "0.3"] for "image/gif",
# ["y,uc;k", "q", "0.2", "blah", "a;b,c"] for "asdf/zxcv".
# First, handle an = clause, such as '=bar', or '="y,uc;k"'
if ($str =~ s/^=\s*$quoted_string//) { # quoted string (e.g. "y,uc;k")
(my $val = $1) =~ s/\\(.)/$1/g;
push @val, $val;
}
elsif ($str =~ s/^=\s*([^$ctls\s;,]*)//) { # some unquoted value (e.g. bar)
push @val, $1;
}
else {
push @val, undef;
}
# Now look for continuing values (e.g. ;q="0.2";blah="a;b,c")
while ($str =~ s/^;([^$ctls\s=,;]+)\s*//) {
push @val, $1;
# Look for an = clause, such as ="a;b,c"
if ($str =~ s/^=\s*$quoted_string//) { # quoted string (e.g. "a;b,c")
(my $val = $1) =~ s/\\(.)/$1/g;
push @val, $val;
}
elsif ($str =~ s/^=\s*([^$ctls\s;,]*)//) { # some unquoted value (e.g. bar)
push @val, $1;
}
else {
push @val, undef;
}
}
push @result, @val == 1 ? $val[0] : \@val;
}
elsif ($str !~ s/^\s*[,;\s]//) {
local $" = "|";
die "Invalid header encountered: '$str' (Found \@result=(@result))";
}
}
@result;
}
# Takes a header and header word, and returns true if the header word is
# present in the header. For example,
# $header->contains(Expect => '100-continue')
# will return true for the header:
# Expect: foo=bar, 100-continue, bar=foo
sub contains {
my ($self, $header, $word) = @_;
$header and $word or croak 'Usage: $header->contains(Header => Header_Word)';
my @words = $self->header_words($header);
for (my $i = 0; $i < @words; $i += 2) {
if ($words[$i] eq $word) {
return 1;
}
}
return undef;
}
sub join_words {
my $self;
$self = shift if $_[0] and UNIVERSAL::isa($_[0], __PACKAGE__);
my @words = @_;
my @encoded;
for (my $i = 0; $i < @words; $i += 2) {
my ($k, $v) = @words[$i, $i + 1];
my @pairs = ($k, ref $v eq 'ARRAY' ? @$v : $v);
@pairs % 2 and croak "Invalid composite value passed in for word '$k': Even number of elements in array ref";
my @str;
while (@pairs) {
my ($word, $value) = splice @pairs, 0, 2;
$word =~ /^[^$ctls\s=;,]+$/
or croak "Unable to join: word contains invalid characters: '$word'";
my $str = $word;
if (defined $value) {
$value =~ /[$ctls_without_tab]/
and croak "Unable to join: word value for word '$word' contains control characters: '$value'";
$str .= '=';
if ((not $self or not $Private{$self}->{force_quotes}) and $value =~ /^$token+$/) {
# If it only contains "token" characters, we don't need to quote it
$str .= $value;
}
else {
$value =~ s/([\\"])/\\$1/g;
$str .= qq'"$value"';
}
}
push @str, $str;
}
push @encoded, join ';', @str;
}
return join "$Private{$self}->{separator} ", @encoded
if $self and $Private{$self}->{separator};
return join ', ', @encoded;
}
# Deletes a word from a header's value. If the word is present more than once,
# all forms are removed. Returned is, in scalar context, an integer indicating
# how many headers were removed (0 for no words (or no header) found). In list
# context, you get a list of all the values removed, or undef for valueless
# words.
sub delete_header_word {
my ($self, $header, $word) = @_;
my @ret;
$header and $word or croak 'Usage: $header->delete_header_word(HEADER, WORD)';
for (my $i = 0; $i < @$self; $i += 2) {
if (lc $self->[$i] eq lc $header) {
my @words = $self->split_words($self->[$i + 1]);
my $found;
for (my $j = 0; $j < @words; $j += 2) {
if ($words[$j] eq $word) {
$found++;
push @ret, $words[$j + 1];
splice @words, $j, 2;
$j -= 2;
}
}
if ($found and @words) {
$self->[$i + 1] = $self->join_words(@words);
}
elsif ($found) { # This header contains only the header word
splice @$self, $i, 2;
$i -= 2;
}
}
}
@ret; # If the sub is called in scalar context, so is this
}
# Just like header(), but first deletes the headers to be added. Hence,
# $obj->replace_header($obj->header) should be a no-op.
sub replace_header {
my $self = shift;
croak "Invalid parameters to replace_header: \$obj->replace_header(KEY => VALUE[, KEY => VALUE, ...]);"
if !@_ or @_ % 2;
my %headers;
for (my $i = 0; $i < @_; $i += 2) {
$headers{$_[$i]}++;
}
$self->delete_header(keys %headers);
$self->header(@_);
}
sub format_headers {
my $self = shift;
return '' if !@$self;
my $return = '';
for (my $i = 0; $i < @$self; $i += 2) {
my ($key, $value) = @$self[$i, $i + 1];
# Valid characters from HTTP/1.1 RFC, section 4.2 (page 32)
$key =~ s|([$ctls$separators()<>@,;:\\"/\[\]?={} \t])|sprintf "%%%02X", ord $1|eg;
$value =~ s|([$ctls])|sprintf "%%%02X", ord $1|eg;
$return .= "$key: $value" . CRLF;
}
$return .= CRLF;
return $return;
}
# Clears all headers set for the current object.
sub clear_headers {
my $self = shift;
$#$self = -1;
return;
}
# Deletes one or more headers. Takes a list of headers to remove.
sub delete_header {
my ($self, @headers) = @_;
return 0 unless @$self;
my $headers = join "|", map quotemeta, @headers;
my $found;
for (my $i = 0; $i < @$self; $i += 2) {
if ($self->[$i] =~ /^(?:$headers)$/i) {
splice @$self, $i, 2;
$i -= 2;
$found++;
}
}
return $found;
}
DESTROY {
my $self = shift;
delete $Private{$self};
1;
}
1;
__END__
=head1 NAME
GT::WWW::http::Header - Module for GT::WWW::http request/response headers.
=head1 SYNOPSIS
Typically:
# Assuming $www is a GT::WWW::http object
my $request_header = $www->header;
# Set a header:
$request_header->header('Some-Http-Header' => 'Header value');
# After making a request:
my $response_header = $www->response->header;
# -- or --
my $response_header = $response->header; # $response is the return of, e.g. $www->get
Much more advanced headers can be set and determined, using the various methods
available as described below.
=head1 DESCRIPTION
This module provides an easy to use yet powerful header retrieval/manipulation
object suitable for most HTTP headers.
=head1 METHODS
First, a note about the methods described which add/change/delete headers: such
methods should only be called on a request header, and only before making a
request. Although nothing prevents you from making changes to the request
header after having made the request, or from changing the headers of a
response header object, such behaviour should be considered very bad practise
and is B<strongly> discouraged.
=head2 header
This is the most commonly used method as it is used both to add and retrieve
headers, depending on its usage. The examples below assume the following
header:
Date: Sun, 12 Jan 2003 08:21:21 GMT
Server: Apache
Keep-Alive: timeout=15, max=100
Connection: Keep-Alive
Content-Type: text/html
Content-Encoding: gzip
Content-Length: 3215
X-Foo: bar1
X-Foo: bar2, bar3
With no arguments, a list of all the header names is returned. Given the
example, the following list would be returned:
('Date', 'Server', 'Keep-Alive', 'Connection', 'Content-Type', 'Content-Encoding', 'Content-Length', 'X-Foo', 'X-Foo')
With a single argument, a list of value(s) for headers of that name are
returned. In scalar context, only the first value is returned. In list
context, a list of all values is returned. Note that the header named passed
in is case-insensitive.
my $server = $header->header('server'); # returns 'Apache'
my $foo = $header->header('X-Foo'); # returns 'bar1'
my @foo = $header->header('x-Foo'); # returns ('bar1', 'bar2, bar3')
Finally, when more than one argument is provided, header values are set. At
its simplest level, it takes a list of key => value pairs (NOT a hash, since
duplicate keys are possible) of headers to set. So, to set the headers
'Server' and 'Content-Length' above at once, you could call:
$header->header(Server => 'Apache', 'Content-Length' => 3215);
Or, if you prefer:
$header->header(Server => 'Apache');
$header->header('Content-Length' => 3215);
Note that the order in which headers are added is preserved, for times when the
order of headers is important.
B<WARNING>: Before reading the below information, you should first know that it
describes advanced usage of the header() method and requires have a grasp of
the intricacies of HTTP headers; the following is _not_ required knowledge for
typical GT::WWW use.
Consider the above Keep-Alive header an example. Instead of specifying:
$header->header('Keep-Alive' => 'timeout=15, max=100');
you could alternately write it as:
$header->header('Keep-Alive' => [timeout => 15, max => 100]);
This allows you a more pragmatic approach when you already have some sort of
data structure of the header options. You can go a step further with this, by
specifying C<undef> as the value:
# Set the second X-Foo header in the example:
$header->header('X-Foo' => [bar2 => undef, bar3 => undef]);
header() also allows you to set values such as:
image/gif;q=0.2
As can be seen in this example:
Accept: image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1
To do so, specify the suboption value as another array reference. The first
element of the array reference is usually undef, while the remaining are the
k=v pairs in the segment. So, in the above header, the 'image/gif;q=0.2' section
would be specified as:
'image/gif' => [undef, q => 0.2]
(If a segment such as "foo=bar;bar=foo" is ever needed, the C<undef> would be
changed to C<"bar">.)
So, piecing it all together, the Accept header shown above could be specified
like this:
$header->header(
Accept => [
'image/png' => undef,
'image/jpeg' => undef,
'image/gif' => [undef, q => 0.2],
'*/*' => [undef, q => 0.1]
]
);
=head2 header_words
When you need to see it a header value contains a particular "word", this
method is the one to use. As an example, consider this header:
X-Foo: bar, bar2, bar3
In order to determine whether or not "bar2" has been specified as an X-Foo
value, you could attempt some sort of regex - or you could just call this
method. The return value splits up the header in such a way as to be useful to
determine the exact information contained within the header.
The method takes a case-insensitive header name, just like the single-argument
form of header().
A even-numbered hash-I<like> list is always returned - though each element of
that list depends on the content of the header. First of all, if the header
specified does not exist, you'll get an empty list back.
Assuming that the header does exist, it will first be broken up by C<,>.
The even-indexed (0, 2, 4, ...) elements of the list are the keys, while the
odd numbered elements are the values associated with those keys - or undef if
there is no value (as above; an example with values is shown below).
So, using the above X-Foo header example, calling this method with C<'X-Foo'>
as an argument would give you back the list:
(bar => undef, bar2 => undef, bar3 => undef)
Getting a little more complicated, consider the following header:
X-Foo: bar, bar2=foo, bar3
Because of the "=foo" part, the list returned would now be:
(bar => undef, bar2 => "foo", bar3 => undef)
Quoting of values is also permitted, so the following would be parsed correctly
with C<'1;2,3=4"5\6'> being the value of bar2:
X-Foo: bar, bar2="1;2,3=4\"5\\6", bar3
Getting more complicated, this method also handles complex values containing
more than one piece of information. A good example of this is in content type
weighting used by most browsers. As a real life example (generated by
the Phoenix web browser):
Accept: video/x-mng,image/png,image/jpeg,image/gif;q=0.2,*/*;q=0.1
Working that into the X-Foo example, consider this header:
X-Foo: bar, bar2=foo, bar3;foo1=24;foo2=10
In this case, the value for bar3 will become an array reference to handle the
multiple pieces of information in the third part:
(bar => undef, bar2 => "foo", bar3 => [undef, foo1 => 24, foo2 => 10])
(If you've read the advanced section of the L<C<header()>|/header>
documentation, and this looks familiar, you're right - the return value of this
function, if put in an array reference, is completely compatible with a
header() value.)
The C<undef> value at the beginning of the array reference is rarely anything other
than C<undef>, but it I<could> be, if a header such as this were encountered:
X-Foo: bar=foo,foo1=10
That would return:
(bar => ["foo", foo1 => 10])
One additional thing to note is that header_words() returns the header words
for B<all> matching headers. Thus if the following two headers were set:
X-Foo: bar, bar2=foo
X-Foo: bar3
You would get the same return as if this header was set (shown above):
X-Foo: bar, bar2=foo, bar3
A good example usage of this is for a file download. To get the filename, you
would do something like:
my %cd = $header->header_words('Content-Disposition');
my $filename;
if ($cd{filename}) { $filename = $cd{filename} }
else { $filename = "unknown" }
=head2 split_words
This can be called as object method, class method, or function - it takes a
single argument, a string, which it proceeds to split up as described for the
above header_words() method. Note that this knows nothing about header names -
it simply knows how to break a header value into the above format.
This method is used internally by header_words(), but can be used separately if
desired.
=head2 contains
This method takes two arguments: a header, and a header word. It returns true
if the header word passed is found in the header specified. For example, the
following would return true:
$header->contains('X-Foo' => 'bar2')
for any of these headers:
X-Foo: bar2
X-Foo: bar, bar2, bar3
X-Foo: bar, bar2=10, bar3
X-Foo: bar, bar2=10;q=0.3, bar3
but not for either of these:
X-Foo: bar, bar3=bar2
X-Foo: bar, bar3;bar2=10
=head2 join_words
join_words() does the opposite of split_words(). That is, it takes a value such
as might be returned by split_words(), and joins it up properly, quoting if
necessary. This is called internally when creating the actual header, and can
be called separately at a method or function if desired.
=head2 delete_header_word
This takes a header and header word, and proceeds to remove any occurances of
the header word from the header specified.
After calling:
$header->delete_header_word('X-Foo', 'bar2');
this header:
X-Foo: bar, bar2;foo=bar, bar3
would become:
X-Foo: bar, bar3
=head2 delete_header
This takes a list of header names. The headers specified are completely
removed.
=head2 replace_header
This 2 or more arguments in exactly the same way as header(), however all the
specified headers are deleted (assuming they exist) before being readded.
=head2 format_headers
This returns a properly formatted (lines are CRLF delimited) header. If you
use the header as a string (i.e. C<"$header">), this method will be internally
called, and so generally does not need to be called directly.
The returned string has the final blank line that identifies the end of the
header.
=head2 clear_headers
This deletes all headers.
=head1 SEE ALSO
L<GT::WWW::http>
L<GT::WWW>
RFC 2616: L<http://www.ietf.org/rfc/rfc2616.txt>
=head1 MAINTAINER
Jason Rhinelander
=head1 COPYRIGHT
Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
http://www.gossamer-threads.com/
=head1 VERSION
Revision: $Id: Header.pm,v 1.8 2004/02/17 01:33:08 jagerman Exp $
=cut