discourse-legacysite-perl/site/slowtwitch.com/cgi-bin/articles/GT/Tar.pm
2024-06-17 21:49:12 +10:00

1190 lines
38 KiB
Perl

# ==================================================================
# Gossamer Threads Module Library - http://gossamer-threads.com/
#
# GT::Tar
# Author: Scott Beck
# CVS Info : 087,071,086,086,085
# $Id: Tar.pm,v 1.57 2006/08/28 23:17:11 brewt Exp $
#
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
# ==================================================================
#
# Description: A general purpose taring and untaring module.
#
package GT::Tar;
# ==================================================================
# Pragmas
use vars qw/$DEBUG $ERRORS $FAKE_GETPWUID $HAVE_GZIP $FAKE_GETGRGID $FH/;
use strict;
# System modules
use Fcntl;
use Symbol qw/gensym/;
use constants
BLOCK => 4096,
# 'Type' constants - these are the actual chars, not the char values
FILE => 0,
HARDLINK => 1,
SYMLINK => 2,
CHARDEV => 3,
BLOCKDEV => 4,
DIR => 5,
FIFO => 6,
SOCKET => 8,
UNKNOWN => 9,
LONGNAME => 'L',
FORMAT_HEADER_PACK => 'a100 a8 a8 a8 a12 a12 A8 a1 a100 a6 a2 a32 a32 a8 a8 a155 x12',
FORMAT_HEADER_UNPACK => 'A100 A8 A8 A8 A12 A12 A8 A1 A100 A6 A2 A32 A32 A8 A8 A155';
# Internal modules
use GT::Base;
# Globals
$DEBUG = 0;
@GT::Tar::ISA = qw{GT::Base};
$ERRORS = {
OPEN => "Could not open '%s': %s",
READ => "There was an error reading from '%s'. Expected to read %s bytes, but only got %s",
BINMODE => "Could not binmode '%s': %s",
BADARGS => "Bad arguments passed to %s: %s",
CHECKSUM => "Checksum error parsing tar file. Most likely this is a corrupt tar.\nHeader: %s\nChecksum: %s\nFile: %s\n",
NOBODY => "File '%s' has no body!",
CANTFIND => "Unable to find a file named '%s' in tar archive",
CHMOD => "Could not chmod '%s': %s",
DIRFILE => "Unable to create directory: '%s' exists and is a file",
MKDIR => "Could not mkdir '%s': %s",
RENAME => "Unable to rename temporary file '%s' to '%s': %s",
NOGZIP => "Compress::Zlib module is required to work with .tar.gz files"
};
$FAKE_GETPWUID = "unknown" if ($^O eq 'MSWin32');
$FAKE_GETGRGID = "unknown" if ($^O eq 'MSWin32');
$HAVE_GZIP = eval { local $SIG{__DIE__}; require Compress::Zlib; 1; } ? 1 : 0;
$FH = 0;
sub new {
# ------------------------------------------------------------------------------
# GT::Tar->new('/path/to/new/tar.tar');
# --------------------------------------
# Constructor for GT::Tar. Call this method to create a new archive.
# To do anything with an existing archive call GT::Tar->open.
#
my $this = shift;
my $class = ref $this || $this;
my $self = bless {}, $class;
my $opt = {};
if (@_ == 1) { $opt->{io} = shift }
else {
$opt = $self->common_param(@_);
}
$self->{_debug} = exists $opt->{debug} ? $opt->{debug} : $DEBUG;
$opt->{io} or return $self->fatal(BADARGS => "new()", "No output archive passed in");
$opt->{io} =~ /^(.+)$/;
$self->{clean} = exists $opt->{clean} ? $opt->{clean} : 1;
my $file = $1;
# If it's a gz file, store the name in gz_file, and work off a temp file.
if ($file =~ /\.t?gz$/) {
$HAVE_GZIP or return $self->warn('NOGZIP');
require GT::TempFile;
my $tmpfile = new GT::TempFile;
$self->{file} = $$tmpfile; # Filename of ungzipped tar file.
$self->{gz_file} = $file; # Filename of gzipped file.
$self->{tmp_file} = $tmpfile; # Don't unlink it till the object is destroyed.
}
else {
$self->{file} = $file;
}
$self->{io} = gensym;
sysopen $self->{io}, $self->{file}, O_CREAT|O_TRUNC|O_RDWR or return $self->fatal(OPEN => $self->{file}, "$!");
binmode $self->{io} or return $self->fatal(BINMODE => $self->{file}, "$!");
select((select($self->{io}), $| = 1)[0]);
$self->{parsed} = 0;
$self->{new_tar} = 1;
return $self;
}
sub open {
# ------------------------------------------------------------------------------
# GT::Tar->open('/path/to/tar.tar');
# -----------------------------------
# Opens the tar specified by the first argument for reading and calls
# $obj->parse to parse the contents.
# Returns a new GT::Tar object.
#
my $this = shift;
my $class = ref $this || $this;
my $self = bless {}, $class;
my $opt = {};
if (@_ == 1) { $opt->{io} = shift }
else {
$opt = $self->common_param(@_);
}
$self->{_debug} = exists $opt->{debug} ? $opt->{debug} : $DEBUG;
$opt->{io} or return $self->fatal(BADARGS => "open()", "No input archive passed in");
$opt->{io} =~ /^(.+)$/;
my $file = $1;
# If it's a gz file, uncompress it to a temp file and work off that.
if ($file =~ /\.t?gz$/) {
$HAVE_GZIP or return $self->warn('NOGZIP');
require GT::TempFile;
my $tmpfile = new GT::TempFile;
$self->debug("Decompressing gz file to temp file: $$tmpfile") if ($self->{_debug});
open(FH, "> $$tmpfile") or return $self->warn(OPEN => $$tmpfile, "$!");
binmode FH;
my $gz = Compress::Zlib::gzopen($file, 'rb') or return $self->warn(OPEN => $file, $Compress::Zlib::gzerrno);
my $line;
while ($gz->gzreadline($line)) {
print FH $line;
}
close FH;
$gz->gzclose;
$self->{file} = $$tmpfile; # Filename of open ungzipped tar file.
$self->{gz_file} = $file; # Filename of original gzipped file.
$self->{tmp_file} = $tmpfile; # Don't unlink it till the object is destroyed.
}
else {
$self->{file} = $file;
}
$self->{io} = gensym;
$self->debug("Opening $file") if ($self->{_debug});
sysopen $self->{io}, $self->{file}, O_RDONLY or return $self->warn(OPEN => $self->{file}, "$!");
binmode $self->{io} or return $self->warn(BINMODE => $self->{file} => "$!");
select((select($self->{io}), $| = 1)[0]);
my $parts = $self->parse;
defined $parts or return;
$self->{new_tar} = 0;
return $self;
}
sub close_tar {
# ------------------------------------------------------------------------------
# Closes the tar file.
#
my $self = shift;
$self->{parsed} = 0;
close $self->{io} if ($self->{io} and defined fileno($self->{io}));
}
sub DESTROY { my $self = shift; $self->close_tar; }
sub parse {
# ------------------------------------------------------------------------------
# Modified from code in Archive::Tar
# Untar a file, specified by first argument to directories, specified in third
# argument, and set the path to perl, specified in second argument, to all .pl
# and .cgi files
#
my $self = shift;
$self->{parts} = [];
my ($head, $msg);
my $tar = $self->{io}
or return $self->fatal(BADARGS => "parse", "An IO must be defined to parse");
seek($tar, 0, 0);
my $longname;
READLOOP: while (read($tar, $head, 512) and length($head) == 512) {
# End of archive
last READLOOP if $head eq "\0" x 512;
# Apparently this should really be two blocks of 512 zeroes, but GNU tar
# sometimes gets it wrong. See comment in the source code (tar.c) to GNU cpio.
my $file = GT::Tar::Parts->format_read($head);
$self->debug("Looking at $file->{name}") if ($self->{_debug});
substr($head, 148, 8) = " ";
if ($file->{type} eq FILE) {
if (unpack("%16C*", $head) != $file->{chksum}) {
return $self->warn(CHECKSUM => $head, $file->{chksum}, $file->{name});
}
}
if ($file->{type} eq FILE || $file->{type} eq LONGNAME) {
# Find the start and the end positions in the tar file for the body of the tar
# part
my $start = tell $tar;
seek($tar, $file->{size}, 1);
$file->body([$tar, $start]);
# Seek off trailing garbage.
my $block = $file->{size} & 0x01ff ? ($file->{size} & ~0x01ff) + 512 : $file->{size};
my $to_read = $block - $file->{size};
if ($to_read) { seek($tar, $to_read, 1) }
}
if ($longname and $file->{type} ne LONGNAME) {
my $filename = $longname->body_as_string;
$filename =~ s|\0.*||s;
$file->name($filename);
}
$file->name(_clean($file->name)) if $self->{clean};
# Guard against tarfiles with garbage at the end
last READLOOP if $file->{name} eq '';
if ($file->{type} eq LONGNAME) {
$longname = $file;
}
else {
$longname = undef;
# Sanity check - make sure a file doesn't end in a /
if ($file->{type} == GT::Tar::FILE and $file->{name} =~ m|/$|) {
$file->{type} = GT::Tar::DIR;
}
push(@{$self->{parts}}, $file);
}
}
$self->{parsed} = 1;
seek($tar, 0, 0);
return wantarray ? @{$self->{parts}} : $self->{parts};
}
sub _clean {
# -----------------------------------------------------------------------------
# Sanitises a path, removing anything up to a .. path component, and removing
# any leading /'s.
#
my $path = shift;
$path =~ s#.*(?:^|/)\.\.(/|$)#$1#;
$path =~ s#^/+##;
$path;
}
sub untar {
# -----------------------------------------------------------------------------
# $obj->untar(\&code);
# ---------------------
# Untars tar file specified in $obj->open and runs callback for each entry in
# the tar file. Passed a parts object to that callback.
#
# $obj->untar;
# ------------
# Same a above but no callback.
#
# GT::Tar->untar('/path/to/tar.tar', \&code);
# -------------------------------------------
# Untars file specified by the first argument and runs callback in second
# argument.
#
# GT::Tar->untar('/path/to/tar.tar', '/path/for/extraction');
# -----------------------------------------------------------
# Untars tar file specified in first argument into path specified in second
# argument.
#
my $self = ref $_[0] eq __PACKAGE__ ? shift : shift->open(shift);
my $callback = pop;
if (ref $callback) {
(ref $callback eq 'CODE')
or return $self->fatal(BADARGS => "untar", "Callback that was passed in was not a code ref");
}
elsif ($callback) {
-d $callback or return $self->fatal(BADARGS => untar => "Extraction path '$callback' does not exist");
}
if (!$self->{parsed}) {
$self->debug("Parsing tar file") if ($self->{_debug});
$self->parse or return;
}
else {
$self->debug("Already parsed") if ($self->{_debug});
}
for (@{$self->{parts}}) {
if (ref $callback eq 'CODE') {
$callback->($_);
}
else {
$_->write($callback || ());
}
}
return $self;
}
sub tar {
# ------------------------------------------------------------------------------
# $obj->tar;
# ----------
# Creates tar file that was specified in $obj->new with files that were added
# using $obj->add.
#
# GT::Tar->tar('/path/to/tar.tar', @files);
# ------------------------------------------
# Creates tar file specified by the first argument with the files specified
# by the remaining arguments.
#
my $self;
if (ref $_[0] eq __PACKAGE__) {
$self = shift;
}
else {
my $class = shift;
$self = $class->new( io => shift );
$self->add(@_) if (@_);
}
$self->write;
}
sub write {
# ------------------------------------------------------------------------------
# $obj->write;
# ------------
# Creates all the files that are internally in the parts objects. You add
# files to parts by calling $obj->add -or- by calling $obj->open on an
# existing tar file. This is similar to untar.
#
my $self = shift;
my ($out, $rename, $filename);
# Working off an existing tar file.
if (! $self->{new_tar}) {
if (@_) {
$filename = shift;
# If we have a new .tar.gz file, we need to write it to a tmp .tar first.
if ($filename =~ /\.t?gz$/) {
$HAVE_GZIP or return $self->warn('NOGZIP');
$self->{gz_file} = $filename;
undef $filename;
}
}
if (! $filename) {
require GT::TempFile;
my $tmp = new GT::TempFile;
$filename = $$tmp;
$rename = $self->{file};
}
$out = gensym;
sysopen $out, $filename, O_CREAT|O_TRUNC|O_RDWR or return $self->warn(OPEN => $filename, "$!");
binmode $out or return $self->fatal(BINMODE => $filename, "$!");
}
# Working off a new tar file.
else {
$out = $self->{io};
seek($out, 0, 0);
}
# Unbuffer output
select((select($out), $| = 1)[0]);
foreach my $entry (@{$self->{parts}}) {
my $head = $entry->format_write;
print $out $head;
my $save = tell $out;
if ($entry->type == FILE) {
my $bh;
my $body = $entry->body or return $self->warn(NOBODY => $entry->name);
my $ref = ref $body;
if ($ref eq 'GLOB' and defined fileno $body) {
my $fh = $body;
my $pos = tell $fh;
binmode $fh;
while (read $fh, $_, BLOCK) {
print $out $_;
}
seek($fh, $pos, 0);
}
elsif ($ref eq 'ARRAY') {
my ($reads, $rem, $data, $pos);
my ($fh, $start) = @{$body};
$pos = tell $fh;
seek($fh, $start, 0);
binmode $fh;
$reads = int($entry->{size} / BLOCK);
$rem = $entry->{size} % BLOCK;
for (1 .. $reads) {
my $read = read($fh, $data, BLOCK);
($read == BLOCK)
or return $self->warn(READ => join(',' => @{$body}), BLOCK, $read);
print $out $data;
}
if ($rem) {
my $read = read($fh, $data, $rem);
($read == $rem)
or return $self->warn(READ => join(',' => @{$body}), $rem, $read);
print $out $data;
}
seek($fh, $pos, 0);
}
elsif ($ref eq 'SCALAR') {
CORE::open F, ${$body} or return $self->warn(READOPEN => ${$body}, "$!");
binmode F;
while (read F, $_, BLOCK) {
print $out $_;
}
close F;
}
else {
print $out $body;
}
my $size = $entry->{size} & 511;
if ($size) {
print $out ("\0" x (512 - $size));
}
$entry->body( [ $out, $save ] );
}
}
print $out ("\0" x 1024);
# Copy the temp file over to the original file (can't rename across filesystems).
if ($rename and !$self->{gz_file}) {
seek($out, 0, 0);
$self->{io} = gensym;
sysopen($self->{io}, $rename, O_CREAT|O_TRUNC|O_RDWR) or return $self->warn(OPEN => $rename, "$!");
binmode $self->{io};
while (read($out, my $buffer, BLOCK)) {
print {$self->{io}} $buffer;
}
seek($self->{io}, 0, 0);
# Need to set the parts to the new file handle.
foreach my $entry (@{$self->{parts}}) {
if ($entry->type == FILE) {
$entry->{body}->[0] = $self->{io};
}
}
close $out;
$out = $self->{io};
$self->{file} = $rename;
unlink $filename or return $self->warn(UNLINK => $filename, "$!");
}
# Recompress if it was a .gz file.
if ($self->{gz_file}) {
$HAVE_GZIP or return $self->warn('NOGZIP');
seek($out, 0, 0);
my $gz = Compress::Zlib::gzopen($self->{gz_file}, 'wb') or return $self->warn(OPEN => $self->{gz_file}, $Compress::Zlib::gzerrno);
while (read($out, my $buffer, BLOCK)) {
$gz->gzwrite($buffer);
}
$gz->gzclose();
seek($out, 0, 0);
}
return 1;
}
sub extract {
# ------------------------------------------------------------------------------
# $obj->extract(@list);
# ----------------------
# $obj->extract(\@list);
# -----------------------
# Extracts only the files specified in @list from the working tar file. No
# files are extracted if none are in memory.
#
my $self = shift;
my %files = map { $_ => 1 } ref($_[0]) eq 'ARRAY' ? @{$_[0]} : @_;
my $num = '0E0';
foreach my $entry (@{$self->{parts}}) {
next unless (exists $files{$entry->{name}});
$entry->write;
$num++;
}
return $num;
}
sub add_file {
# ------------------------------------------------------------------------------
# $obj->add_file(@list);
# ------------------
# $obj->add_file(\@list);
# -------------------
# Adds the files specified in @list to the in-memory archive.
#
my $self = shift;
my @files = ref $_[0] eq 'ARRAY' ? @{$_[0]} : @_;
while (my $file = shift @files or @files) {
next if not defined $file;
my ($mode, $nlnk, $uid, $gid, $rdev, $size, $mtime, $type, $linkname);
$self->debug("Looking at $file") if ($self->{_debug});
if (($mode, $nlnk, $uid, $gid, $rdev, $size, $mtime) = (lstat $file)[2 .. 7, 9]) {
$linkname = "";
$type = filetype($file);
$linkname = readlink $file if ($type == SYMLINK);
if ($type == DIR) {
my $dir = gensym;
opendir $dir, $file or return $self->warn(OPEN => "Can't add directory '$file'", "$!");
push(@files, map { $file . '/' . $_ } grep !/^\.\.?$/, readdir $dir);
closedir $dir;
}
my $part = GT::Tar::Parts->new(
{
name => $file,
mode => $mode,
uid => $uid,
gid => $gid,
size => $size,
mtime => ($mtime | 0),
chksum => " ",
magic => "ustar",
version => "",
type => $type,
linkname => $linkname,
devmajor => 0, # We don't handle this yet
devminor => 0, # We don't handle this yet
uname => ($FAKE_GETPWUID || scalar getpwuid($uid)),
gname => ($FAKE_GETGRGID || scalar getgrgid($gid)),
prefix => "",
}
);
if ($type == FILE) {
$self->debug("Adding $file to as body") if ($self->{_debug});
$part->body(\$file);
}
push(@{$self->{parts}}, $part);
}
else {
$self->debug("Could not stat file '$file'");
}
}
return wantarray ? @{$self->{parts}} : $self->{parts};
}
sub remove_file {
# -------------------------------------------------------------------
# Takes a string and removes the file from the tar.
#
my ($self, $filename) = @_;
return unless (defined $filename);
@{$self->{parts}} = grep { $_->{name} ne $filename } @{$self->{parts}};
}
sub get_file {
# -------------------------------------------------------------------
# Returns the file object of a given file name.
#
my ($self, $filename) = @_;
return unless (defined $filename);
my @files = grep { $_->{name} eq $filename } @{$self->{parts}};
if (! @files) {
return $self->warn(CANTFIND => $filename);
}
return wantarray ? @files : shift @files;
}
sub add_data {
# -------------------------------------------------------------------
# $obj->add_newfile( { ... } );
# ------------------------------
# Adds a file from a hash ref of part attributes.
#
my $self = shift;
my $part = @_ > 1 ? {@_} : shift;
ref $part eq 'HASH' or return $self->fatal(BADARGS => "Usage: \$obj->add_newfile( part options )");
defined $part->{name} or return $self->fatal(BADARGS => "You must supply a file name.");
defined $part->{body} or return $self->fatal(BADARGS => "You must supply a body for the file.");
if (ref $part->{body}) {
if (defined fileno $part->{body}) {
local $/;
my $fh = $part->{body};
$part->{body} = <$fh>;
}
else {
return $self->fatal(BADARGS => "You must supply either a scalar or a file handle to body");
}
}
my $file = GT::Tar::Parts->new({
name => $part->{name},
mode => defined $part->{mode} ? $part->{mode} : 0666 & (0777 - umask),
uid => defined $part->{uid} ? $part->{uid} : $>,
gid => defined $part->{gid} ? $part->{gid} : (split(/ /,$)))[0],
size => length $part->{body},
mtime => defined $part->{mtime} ? $part->{mtime} : time,
chksum => " ",
magic => "ustar",
version => "00",
type => FILE,
linkname => '',
devmajor => 0, # We don't handle this yet
devminor => 0, # We don't handle this yet
uname => ($FAKE_GETPWUID || scalar getpwuid(defined $part->{uid} ? int($part->{uid}) : $>)),
gname => ($FAKE_GETGRGID || scalar getgrgid(defined $part->{gid} ? int($part->{gid}) : (split(/ /,$)))[0])),
prefix => ""
});
$file->body($part->{body});
push(@{$self->{parts}}, $file);
return $file;
}
sub files {
# ------------------------------------------------------------------------------
# my @files = $obj->files;
# ------------------------
# Returns a list of the part objects that are in the in-memory archive.
# Returns an array ref in scalar context.
#
my @parts = defined $_[0]->{parts} ? @{$_[0]->{parts}} : ();
return wantarray ? @parts : \@parts;
}
sub filetype {
# ------------------------------------------------------------------------------
# Internal method. filetype -- Determine the type value for a given file
#
my $file = shift;
return SYMLINK if (-l $file); # Symlink
return FILE if (-f _); # Plain file
return DIR if (-d _); # Directory
return FIFO if (-p _); # Named pipe
return SOCKET if (-S _); # Socket
return BLOCKDEV if (-b _); # Block special
return CHARDEV if (-c _); # Character special
return UNKNOWN; # Something else (like what?)
}
package GT::Tar::Parts;
# ==================================================================
# Pragmas
use vars qw/$DEBUG $ERRORS $ATTRIBS $ERROR_MESSAGE/;
use strict;
# System modules
use Fcntl;
use Symbol qw/gensym/;
# Globals
$DEBUG = $GT::Tar::DEBUG;
@GT::Tar::Parts::ISA = qw{GT::Base};
$ATTRIBS = {
name => '',
mode => '',
uid => '',
gid => '',
size => '',
mtime => '',
chksum => " ",
type => '',
linkname => '',
magic => "ustar",
version => undef,
uname => 'unknown',
gname => 'unknown',
devmajor => 0, # We don't handle this yet
devminor => 0, # We don't handle this yet
prefix => "",
body => undef,
set_owner => 1,
set_perms => 1,
set_time => 1,
};
$ERROR_MESSAGE = 'GT::Tar';
sub format_read {
# ------------------------------------------------------------------------------
# my $obj = GT::Tar::Parts->format_read($heading);
# -------------------------------------------------
# Unpacks the string that is passed in. The string need to be a valid header
# from a single entry in a tar file. Return a new object for the Tar part.
# You will need to set the body yourself after calling this.
#
my $head_tainted = pop;
my ($head) = $head_tainted =~ /(.+)/;
my $file = {};
(
$file->{name}, $file->{mode},
$file->{uid}, $file->{gid},
$file->{size}, $file->{mtime},
$file->{chksum}, $file->{type},
$file->{linkname}, $file->{magic},
$file->{version}, $file->{uname},
$file->{gname}, $file->{devmajor},
$file->{devminor}, $file->{prefix}
) = unpack(GT::Tar::FORMAT_HEADER_UNPACK, $head);
$file->{uid} = oct $file->{uid};
$file->{gid} = oct $file->{gid};
$file->{mode} = oct $file->{mode};
$file->{size} = oct $file->{size};
$file->{mtime} = oct $file->{mtime};
$file->{chksum} = oct $file->{chksum};
$file->{devmajor} = oct $file->{devmajor};
$file->{devminor} = oct $file->{devminor};
$file->{name} = $file->{prefix} . "/" . $file->{name} if $file->{prefix};
$file->{prefix} = "";
return GT::Tar::Parts->new($file);
}
sub format_write {
# ------------------------------------------------------------------------------
# $obj->format_write;
# -------------------
# Formats the current object's header for writing to a tar file. Returns the
# formatted string. In the case of a file with a path+name longer than 100
# characters (in other words, longer than can fit in the tar's filename
# field), this actually returns a longlink header + longlink body + file
# header.
#
my $self = shift;
my $file = $self->{name};
if ($self->{type} == GT::Tar::DIR and $file !~ m,/$,) {
$file .= '/';
}
my $longlink;
if (length($file) > 100) {
my $body = $file . "\0"; # GNU tar produces a long link file with a body ending with a \0; copy it.
$longlink = pack(
GT::Tar::FORMAT_HEADER_PACK,
'././@LongLink', # Filename
sprintf('%07o', 0), # mode
sprintf('%07o', 0), # uid
sprintf('%07o', 0), # gid
sprintf('%011o', length $body), # size
sprintf('%011o', 0), # mtime
'', # checksum
GT::Tar::LONGNAME, # type
'ustar', ' ',
'root', # username (Using 'root' copied from GNU tar)
'root', # group name (Using 'root' copied from GNU tar)
'', # devmajor
'', # devminor
'' # prefix
);
substr($longlink, 148, 7) = sprintf("%06o\0", unpack("%16C*", $longlink));
$longlink .= $body;
my $pad_from = length($body) % 512;
if ($pad_from) {
$longlink .= "\0" x (512 - $pad_from);
}
}
my $header = pack(
GT::Tar::FORMAT_HEADER_PACK,
$file,
sprintf("%07o", $self->{mode}),
sprintf("%07o", $self->{uid}),
sprintf("%07o", $self->{gid}),
sprintf("%011o", $self->{type} == GT::Tar::DIR ? 0 : $self->{size}),
sprintf("%011o", $self->{mtime}),
"", #checksum field - space padded by pack("A8")
$self->{type},
$self->{linkname},
($self->{magic} eq 'ustar' and !$self->{version})
? ('ustar ', ' ') # oldgnu format, which treated magic+version as a contiguous field with a value of "ustar \0"
: ($self->{magic}, $self->{version} || '00'),
$self->{uname},
$self->{gname},
'', # sprintf("%6o ",$self->{devmajor}),
'', # sprintf("%6o ",$self->{devminor}),
'' # prefix
);
substr($header, 148, 7) = sprintf("%06o\0", unpack("%16C*", $header));
$header = $longlink . $header if $longlink;
return $header;
}
sub body {
# ------------------------------------------------------------------------------
# my $path = $obj->body;
# ----------------------
# $obj->body(\'/path/to/body');
# $obj->body("My body text.");
# -----------------------------
# Sets or gets the path to the body of this tar part. If a scalar ref is
# passed in it is considered a path to a file otherwize it is considered a
# string to write to the body when write is called.
#
my ($self, $io) = @_;
!$io and return $self->{body};
$self->{body} = $io;
my $ref = ref $io;
if ($ref eq 'GLOB' and defined fileno $io) {
$self->{size} = (lstat(${$self->{body}}))[7];
}
elsif ($ref eq 'SCALAR') {
$self->{size} = -s ${$self->{body}};
}
elsif (not $ref) {
$self->{size} = length $self->{body};
}
return $self->{body};
}
sub body_as_string {
# ------------------------------------------------------------------------------
# my $data = $obj->body_as_string;
# --------------------------------
# Returns the body of the file as a string.
#
my $self = shift;
my $data = '';
my $ref = ref $self->{body};
if ($ref eq 'GLOB' and defined fileno $self->{body}) {
my $fh = $self->{body};
my $pos = tell $fh;
seek($fh, 0, 0);
binmode $fh;
local $/;
$data = <$fh>;
seek($fh, $pos, 0);
}
elsif ($ref eq 'ARRAY') {
my ($fh, $start) = @{$self->{body}};
my $pos = tell $fh;
binmode $fh;
seek($fh, $start, 0);
read($fh, $data, $self->{size});
seek($fh, $pos, 0);
}
elsif ($ref eq 'SCALAR') {
my $fh = gensym;
open $fh, ${$self->{body}} or return $self->warn(READOPEN => ${$self->{body}}, "$!");
binmode $fh;
read($fh, $data, -s $fh);
close $fh;
}
else {
$data = $self->{body};
}
return $data;
}
sub write {
# ------------------------------------------------------------------------------
# $obj->write;
# ------------
# Writes this part to disk using the path that is in $obj->body. This function
# will recursively make the directories needed to create the structure of this
# part.
#
# An optional extraction path can be passed in - if provided, extraction will
# be based in that directory instead of the current directory.
#
my ($self, $extract_to) = @_;
# For the moment, we assume that all paths in tarfiles are given according to
# Unix standards, which they *are*, according to the tar format spec!
$self->_write_dir($extract_to) or return;
if ($self->{type} == GT::Tar::FILE) {
my $out = gensym;
my $name = ($self->{name} =~ /^(.+)$/s)[0];
$name = "$extract_to/$name" if $extract_to;
open $out, "> $name" or return $self->warn(OPEN => $name, "$!");
binmode $out or return $self->warn(BINMODE => $name => "$!");
my $ref = ref $self->{body};
if ($ref eq 'GLOB' and defined fileno $self->{body}) {
my $fh = $self->{body};
my $pos = tell $fh;
binmode $fh;
while (read $fh, $_, GT::Tar::BLOCK) {
print $out $_;
}
seek($fh, $pos, 0);
}
elsif ($ref eq 'ARRAY') {
my ($reads, $rem, $data, $pos);
my ($fh, $start) = @{$self->{body}};
$pos = tell $fh;
seek($fh, $start, 0);
binmode $fh;
$reads = int($self->{size} / GT::Tar::BLOCK);
$rem = $self->{size} % GT::Tar::BLOCK;
for (1 .. $reads) {
my $read = read($fh, $data, GT::Tar::BLOCK);
($read == GT::Tar::BLOCK)
or return $self->warn(READ => join(',' => @{$self->{body}}), GT::Tar::BLOCK, $read);
print $out $data;
}
if ($rem) {
my $read = read($fh, $data, $rem);
($read == $rem)
or return $self->warn(READ => join(',' => @{$self->{body}}), $rem, $read);
print $out $data;
}
seek($fh, $pos, 0);
}
elsif ($ref eq 'SCALAR') {
my $fh = gensym;
open $fh, ${$self->{body}} or return $self->warn(READOPEN => ${$self->{body}}, "$!");
binmode $fh;
while (read $fh, $_, GT::Tar::BLOCK) {
print $out $_;
}
close $fh;
}
else {
print $out $self->{body};
}
close $out;
$self->debug("Created $self->{name} size $self->{size}") if ($self->{_debug});
}
$self->_file_sets;
return 1;
}
sub _write_dir {
# ------------------------------------------------------------------------------
# Internal method used to create a directory for a file, or just create a
# directory if this is a directory part and the directory does not exist.
#
my ($self, $base_dir) = @_;
my $name = $self->{name};
$name = "$base_dir/$name" if defined $base_dir;
if ($self->{type} == GT::Tar::DIR) {
-e $name and not -d _ and return $self->fatal(DIRFILE => $name);
-d _ or $self->_recurse_mkdir($base_dir) or return;
}
else {
$self->_recurse_mkdir($base_dir) or return;
}
return 1;
}
sub _recurse_mkdir {
# -----------------------------------------------------------------------------
# Internal method to recursivly make a directory. If the directory contains ..
# path components, everything up to the last one is removed. Likewise, a path
# starting with / has the initial / removed.
#
my ($self, $base_dir) = @_;
my $dir = $self->{name};
$dir = "$base_dir/$dir" if defined $base_dir;
my @path = split m|/|, $dir;
pop @path unless substr($dir, -1) eq '/';
my @subpath; # /foo/bar/baz/ -> ('/foo/bar/baz', '/foo/bar', '/foo', '')
for (reverse 0 .. $#path) {
push @subpath, join '/', @path[0 .. $_], '';
}
push @subpath, '.' unless substr($dir, 0, 1) eq '/' or $subpath[-1] eq '.' or $subpath[-1] eq './';
for my $i (0 .. $#subpath) {
my $path = $subpath[$i];
next if $path eq '';
if (-e $path and not -d _) { return $self->warn(DIRFILE => $path) }
elsif (-d _) {
for (reverse 0 .. $i-1) {
next if -d $subpath[$_];
mkdir $subpath[$_], 0777 or return $self->warn(MKDIR => $subpath[$_], "$!");
$self->debug("mkdir $subpath[$_]") if $DEBUG;
}
last;
}
}
return 1;
}
sub _file_sets {
# ------------------------------------------------------------------------------
# Internal method to set the file or directory permissions and or onership of
# this part.
#
my $self = shift;
# Set the file creation time.
if ($self->{set_time}) {
utime time, $self->{mtime}, $self->{name};
}
# Set the file owner.
if ($self->{set_owner}) {
$self->debug("chown ($self->{uid},$self->{gid}) $self->{name}") if ($self->{_debug});
chown($self->{uid}, $self->{gid}, $self->{name})
if ($> == 0 and $^O ne "MacOS" and $^O ne "MSWin32");
}
# Set the permissions (done last in case it makes file readonly)
if ($self->{set_perms}) {
my ($mode) = sprintf("%lo", $self->{mode}) =~ /(\d{3})$/;
$self->debug("chmod $mode, $self->{name}") if ($self->{_debug});
chmod $self->{mode}, $self->{name} or return $self->warn(CHMOD => $self->{name}, "$!");
}
return 1;
}
1;
__END__
=head1 NAME
GT::Tar - Perl module to manipulate tar files.
=head1 SYNOPSIS
use GT::Tar;
my $tar = GT::Tar->open('foo.tar');
$tar->add_file( '/path/to/file' );
$tar->write;
=head1 DESCRIPTION
GT::Tar provides an OO intefrace to a tar file. It allows you to create or edit
tar files, and if you have Compress::Zlib installed, it allows you to work with
.tar.gz files as well!
=head2 Creating a tar file
To create a tar file, you simply call:
my $tar = GT::Tar->new('filename.tar');
and then to save it:
$tar->write;
will save the tar file and any files you have added.
=head2 Opening an existing tar file
To open a tar file you call:
my $tar = GT::Tar->open('/path/to/file.tar')
or die "Can't open: $GT::Tar::error";
Note: the tar object keeps an open filehandle to the file, so if you are on
windows, you may not be able to manipulate it until you call $tar->close_tar, or
the tar object goes out of scope.
=head2 Untarring a tar file
To untar a tar file, you can simply call:
$tar->untar( \&code_ref );
or as a class method
GT::Tar->untar('/path/to/tar.tar', \&code_ref );
The code ref is optional. If provided, you will get passed in the a
GT::Tar::Part object before the file is extracted. This lets you change the
path, or alter any attributes of the file before it is saved to disk.
Alternatively, instead of a code reference you may pass an extraction path - if
passed, all files will be extracted relative to that path.
=head2 Adding files to a tar file
To add a file:
$tar->add_file( '/path/to/file' );
Note, if you add a directory, the tar module will recurse and add all files in
that directory.
To add a file that isn't saved:
$tar->add_data( name => 'Filename', body => 'File body' );
You can pass in either a scalar for the body, or an opened file handle.
=head2 Getting a list of files in a tar
To get a list of files in a tar:
my $files = $tar->files;
This returns an array ref of GT::Tar::Part objects. See below for how to access
information from a part.
Note: if you change a part, it will update the tar file if you save it.
=head2 Getting an individual file from a tar
If you know the name of the file you want:
my $file = $tar->get_file('Filename');
will return a single GT::Tar::Part object.
=head2 Removing a file from a tar
To remove a file, you need to know the name of it:
$tar->remove_file('Filename');
$tar->write;
and you need to save it before the change will take affect.
=head2 GT::Tar::Part
Each file is a separate part object. The part object has the following
attributes:
name file name
mode file permissions
uid user id
gid group id
size file size
mtime last modified time
type file type
body file body
You can access or set any of these attributes by just using the attribute name
as the method (as it inherits from L<GT::Base>).
You can also call:
$file->write;
or:
$file->write("/extraction/path")
and the file will be created with the given attributes. Basically untar just
foreach's through each of the objects and calls write() on it.
=head1 EXAMPLES
To create a new tar and add two directories to it, and save it in
'/tmp/foo.tar';
my $tar = GT::Tar->new('/tmp/foo.tar');
$tar->add_file( '/home/httpd/html' );
$tar->add_file( '/home/backup' );
$tar->write;
To open an existing tar file and save all the .pl files in /home/alex.
my $tar = GT::Tar->open('files.tar');
my $files = $tar->files;
foreach my $file (@$files) {
my $name = $file->name;
if ($name =~ m,[^/]*\.pl$,) {
$file->write("/home/alex");
}
}
=head1 COPYRIGHT
Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
http://www.gossamer-threads.com/
=head1 VERSION
Revision: $Id: Tar.pm,v 1.57 2006/08/28 23:17:11 brewt Exp $
=cut