443 lines
15 KiB
Perl
443 lines
15 KiB
Perl
# ==================================================================
|
|
# File manager - enhanced web based file management system
|
|
#
|
|
# Website : http://gossamer-threads.com/
|
|
# Support : http://gossamer-threads.com/scripts/support/
|
|
# CVS Info :
|
|
# Revision : $Id: Diff.pm,v 1.9 2004/02/17 01:33:07 jagerman Exp $
|
|
#
|
|
# Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved.
|
|
# Redistribution in part or in whole strictly prohibited. Please
|
|
# see LICENSE file for full details.
|
|
# ==================================================================
|
|
|
|
package GT::FileMan::Diff;
|
|
# ==================================================================
|
|
# This module is based off the example scripts distributed with Algorthim::Diff
|
|
#
|
|
|
|
use strict;
|
|
use vars qw($VERSION %HTML_ESCAPE);
|
|
use GT::File::Diff;
|
|
|
|
$VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/;
|
|
%HTML_ESCAPE = (
|
|
'&' => '&',
|
|
'<' => '<',
|
|
'>' => '>',
|
|
'"' => '"'
|
|
);
|
|
|
|
my $File_Length_Difference = 0;
|
|
|
|
sub diff {
|
|
# -----------------------------------------------------------------------------
|
|
# Takes two filenames, or two array refs, and returns a text diff. See also
|
|
# html_diff. Optionally takes an additional number - if provided, you'll get
|
|
# a unified context diff with however many lines of context as you passed in for
|
|
# this value, otherwise you'll get a boring old <, >-type diff.
|
|
# Returns 1 if the first file couldn't be opened, 2 if the second couldn't be
|
|
# opened, and a scalar reference containing the diff otherwise.
|
|
#
|
|
my ($file1, $file2, $context_lines) = @_;
|
|
my ($f1_mod, $f2_mod, $filename1, $filename2);
|
|
|
|
if (!ref $file1) {
|
|
my $fh = \do { local *FH; *FH };
|
|
open $fh, "<$file1" or return 1;
|
|
chomp(my @f1 = <$fh>);
|
|
$f1_mod = (stat $fh)[9];
|
|
($filename1, $file1) = ($file1, \@f1);
|
|
}
|
|
if (!ref $file2) {
|
|
my $fh = \do { local *FH; *FH };
|
|
open $fh, "<$file2" or return 2;
|
|
chomp(my @f2 = <$fh>);
|
|
$f2_mod = (stat $fh)[9];
|
|
($filename2, $file2) = ($file2, \@f2);
|
|
}
|
|
|
|
my $ret = "";
|
|
my $diff = GT::File::Diff::diff($file1, $file2, \&_hash);
|
|
return \($ret = "Files are identical") if not @$diff;
|
|
|
|
if ($context_lines and $f1_mod and $f2_mod) {
|
|
$ret .= "--- $filename1\t" . gmtime($f1_mod) . " -0000\n";
|
|
$ret .= "+++ $filename2\t" . gmtime($f2_mod) . " -0000\n";
|
|
}
|
|
|
|
$File_Length_Difference = 0;
|
|
|
|
my ($hunk, $oldhunk);
|
|
for my $piece (@$diff) {
|
|
$hunk = GT::FileMan::Diff::Hunk->new($file1, $file2, $piece, $context_lines);
|
|
next unless $oldhunk;
|
|
|
|
if ($context_lines and $hunk->does_overlap($oldhunk)) {
|
|
$hunk->prepend_hunk($oldhunk);
|
|
}
|
|
else {
|
|
$ret .= $oldhunk->output_diff($file1, $file2, $context_lines);
|
|
}
|
|
} continue { $oldhunk = $hunk }
|
|
|
|
$ret .= $oldhunk->output_diff($file1, $file2, $context_lines);
|
|
\$ret;
|
|
}
|
|
|
|
# This generates a unique key for the line; we simply take the line and convert
|
|
# all multiple spaces into a single space to effectively perform a "diff -b".
|
|
sub _hash {
|
|
my $str = shift;
|
|
$str =~ s/^\s+//;
|
|
$str =~ s/\s+$//;
|
|
$str =~ s/\s{2,}/ /g;
|
|
$str;
|
|
}
|
|
|
|
sub html_diff {
|
|
# -----------------------------------------------------------------------------
|
|
# Works exactly as the above, but also HTML escapes and colorizes the diff.
|
|
# The first two or three arguments are the same as above, and the last argument
|
|
# is a hash ref of (ID => html_color) pairs. The ID's available, and defaults,
|
|
# are as follows (scalar refs make the text also bold):
|
|
# { file => \"#2e8b57", linenum => \"#a52a2a", sep => "#6a5acd", removed => "#6a5acd", added => "#008b8b" }
|
|
# - file is used only in unified context diffs to show the filename & last modified time
|
|
# - linenum is used to indicate the line numbers the change applies to
|
|
# - sep is used only in non-unified diffs to separate the removed/added lines
|
|
# - removed is the colour for removed lines
|
|
# - added is the colour for added lines
|
|
# The return is the same scalar reference or error number as that of diff(),
|
|
# but formatted for HTML with escaped HTML where necessary and the whole thing
|
|
# wrapped in <pre>...</pre>. Note that no checking or HTML escaping is
|
|
# performed on the colors passed in; it is your responsibility to make sure the
|
|
# values of the colors hash are safe.
|
|
#
|
|
my (@args) = @_;
|
|
my %colors;
|
|
%colors = %{pop @args} if ref $args[-1];
|
|
|
|
$colors{file} ||= \"#2e8b57";
|
|
$colors{linenum} ||= \"#a52a2a";
|
|
$colors{added} ||= "#008b8b";
|
|
$colors{removed} ||= "#6a5acd";
|
|
$colors{sep} ||= "#6a5acd";
|
|
|
|
for (keys %colors) {
|
|
if (ref $colors{$_}) {
|
|
$colors{$_} = qq|<font color="${$colors{$_}}"><b>|;
|
|
$colors{"${_}_close"} = qq|</b></font>|;
|
|
}
|
|
else {
|
|
$colors{$_} = qq|<font color="$colors{$_}">|;
|
|
$colors{"${_}_close"} = qq|</font>|;
|
|
}
|
|
}
|
|
|
|
my $ret = diff(@args);
|
|
return $ret unless ref $ret;
|
|
|
|
$$ret =~ s/(["&<>])/$HTML_ESCAPE{$1}/g;
|
|
$$ret =~ s{^([^ ].*)}{
|
|
my $line = $1;
|
|
if ($line eq '---') {
|
|
qq{$colors{sep}$line$colors{sep_close}}
|
|
}
|
|
elsif (substr($line, 0, 3) eq '---' or substr($line, 0, 3) eq '+++') {
|
|
qq{$colors{file}$line$colors{file_close}}
|
|
}
|
|
elsif (substr($line, 0, 2) eq '@@' or $line =~ /^[0-9]/) {
|
|
qq{$colors{linenum}$line$colors{linenum_close}}
|
|
}
|
|
elsif (substr($line, 0, 1) eq '+' or substr($line, 0, 4) eq '>') {
|
|
qq{$colors{added}$line$colors{added_close}}
|
|
}
|
|
elsif (substr($line, 0, 1) eq '-' or substr($line, 0, 4) eq '<') {
|
|
qq{$colors{removed}$line$colors{removed_close}}
|
|
}
|
|
else {
|
|
# A mistake? We should never get here, but silently ignore if we do
|
|
$line
|
|
}
|
|
}egm;
|
|
|
|
substr($$ret, 0, 0) = '<pre>';
|
|
$$ret .= '</pre>';
|
|
|
|
$ret;
|
|
}
|
|
|
|
|
|
# Package Hunk. A Hunk is a group of Blocks which overlap because of the
|
|
# context surrounding each block. (So if we're not using context, every
|
|
# hunk will contain one block.)
|
|
package GT::FileMan::Diff::Hunk;
|
|
|
|
sub new {
|
|
# Arg1 is output from &LCS::diff (which corresponds to one Block)
|
|
# Arg2 is the number of items (lines, e.g.,) of context around each block
|
|
#
|
|
# This subroutine changes $File_Length_Difference
|
|
#
|
|
# Fields in a Hunk:
|
|
# blocks - a list of Block objects
|
|
# start - index in file 1 where first block of the hunk starts
|
|
# end - index in file 1 where last block of the hunk ends
|
|
#
|
|
# Variables:
|
|
# before_diff - how much longer file 2 is than file 1 due to all hunks
|
|
# until but NOT including this one
|
|
# after_diff - difference due to all hunks including this one
|
|
my ($class, $f1, $f2, $piece, $context_items) = @_;
|
|
|
|
my $block = new GT::FileMan::Diff::Block ($piece); # this modifies $FLD!
|
|
|
|
my $before_diff = $File_Length_Difference; # BEFORE this hunk
|
|
my $after_diff = $before_diff + $block->{"length_diff"};
|
|
$File_Length_Difference += $block->{"length_diff"};
|
|
|
|
# @remove_array and @insert_array hold the items to insert and remove
|
|
# Save the start & beginning of each array. If the array doesn't exist
|
|
# though (e.g., we're only adding items in this block), then figure
|
|
# out the line number based on the line number of the other file and
|
|
# the current difference in file lenghts
|
|
my @remove_array = $block->remove;
|
|
my @insert_array = $block->insert;
|
|
my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2);
|
|
$a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1;
|
|
$a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1;
|
|
$b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1;
|
|
$b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1;
|
|
|
|
$start1 = $a1 == -1 ? $b1 - $before_diff : $a1;
|
|
$end1 = $a2 == -1 ? $b2 - $after_diff : $a2;
|
|
$start2 = $b1 == -1 ? $a1 + $before_diff : $b1;
|
|
$end2 = $b2 == -1 ? $a2 + $after_diff : $b2;
|
|
|
|
# At first, a hunk will have just one Block in it
|
|
my $hunk = {
|
|
"start1" => $start1,
|
|
"start2" => $start2,
|
|
"end1" => $end1,
|
|
"end2" => $end2,
|
|
"blocks" => [$block],
|
|
"f1" => $f1,
|
|
"f2" => $f2
|
|
};
|
|
bless $hunk, $class;
|
|
|
|
$hunk->flag_context($context_items);
|
|
|
|
return $hunk;
|
|
}
|
|
|
|
# Change the "start" and "end" fields to note that context should be added
|
|
# to this hunk
|
|
sub flag_context {
|
|
my ($hunk, $context_items) = @_;
|
|
return unless $context_items; # no context
|
|
|
|
# add context before
|
|
my $start1 = $hunk->{"start1"};
|
|
my $num_added = $context_items > $start1 ? $start1 : $context_items;
|
|
$hunk->{"start1"} -= $num_added;
|
|
$hunk->{"start2"} -= $num_added;
|
|
|
|
# context after
|
|
my $end1 = $hunk->{"end1"};
|
|
$num_added = ($end1+$context_items > $#{$hunk->{f1}}) ?
|
|
$#{$hunk->{f1}} - $end1 :
|
|
$context_items;
|
|
$hunk->{"end1"} += $num_added;
|
|
$hunk->{"end2"} += $num_added;
|
|
}
|
|
|
|
# Is there an overlap between hunk arg0 and old hunk arg1?
|
|
# Note: if end of old hunk is one less than beginning of second, they overlap
|
|
sub does_overlap {
|
|
my ($hunk, $oldhunk) = @_;
|
|
return "" unless $oldhunk; # first time through, $oldhunk is empty
|
|
|
|
# Do I actually need to test both?
|
|
return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 ||
|
|
$hunk->{"start2"} - $oldhunk->{"end2"} <= 1);
|
|
}
|
|
|
|
# Prepend hunk arg1 to hunk arg0
|
|
# Note that arg1 isn't updated! Only arg0 is.
|
|
sub prepend_hunk {
|
|
my ($hunk, $oldhunk) = @_;
|
|
|
|
$hunk->{"start1"} = $oldhunk->{"start1"};
|
|
$hunk->{"start2"} = $oldhunk->{"start2"};
|
|
|
|
unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}});
|
|
}
|
|
|
|
|
|
# DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO...
|
|
sub output_diff {
|
|
my $context_diff = $_[3];
|
|
if ($context_diff) { return &output_unified_diff }
|
|
else { return &output_boring_diff }
|
|
}
|
|
|
|
sub output_unified_diff {
|
|
my ($hunk, $fileref1, $fileref2) = @_;
|
|
my @blocklist;
|
|
my $ret = "";
|
|
|
|
# Calculate item number range.
|
|
my $range1 = $hunk->unified_range(1);
|
|
my $range2 = $hunk->unified_range(2);
|
|
$ret .= "@@ -$range1 +$range2 @@\n";
|
|
|
|
# Outlist starts containing the hunk of file 1.
|
|
# Removing an item just means putting a '-' in front of it.
|
|
# Inserting an item requires getting it from file2 and splicing it in.
|
|
# We splice in $num_added items. Remove blocks use $num_added because
|
|
# splicing changed the length of outlist.
|
|
# We remove $num_removed items. Insert blocks use $num_removed because
|
|
# their item numbers---corresponding to positions in file *2*--- don't take
|
|
# removed items into account.
|
|
my $low = $hunk->{"start1"};
|
|
my $hi = $hunk->{"end1"};
|
|
my ($num_added, $num_removed) = (0,0);
|
|
my @outlist = @$fileref1[$low..$hi];
|
|
for (@outlist) { s/^/ / } # assume it's just context
|
|
|
|
foreach my $block (@{$hunk->{"blocks"}}) {
|
|
foreach my $item ($block->remove) {
|
|
my $op = $item->{"sign"}; # -
|
|
my $offset = $item->{"item_no"} - $low + $num_added;
|
|
$outlist[$offset] =~ s/^ /$op/;
|
|
$num_removed++;
|
|
}
|
|
foreach my $item ($block->insert) {
|
|
my $op = $item->{"sign"}; # +
|
|
my $i = $item->{"item_no"};
|
|
my $offset = $i - $hunk->{"start2"} + $num_removed;
|
|
splice(@outlist,$offset,0,"$op$$fileref2[$i]");
|
|
$num_added++;
|
|
}
|
|
}
|
|
|
|
for (@outlist) { $ret .= "$_\n" } # add \n's
|
|
$ret;
|
|
}
|
|
|
|
sub output_boring_diff {
|
|
# Note that an old diff can't have any context. Therefore, we know that
|
|
# there's only one block in the hunk.
|
|
my ($hunk, $fileref1, $fileref2) = @_;
|
|
my %op_hash = ('+' => 'a', '-' => 'd', '!' => 'c');
|
|
|
|
my $ret = '';
|
|
my @blocklist = @{$hunk->{"blocks"}};
|
|
warn ("Expecting one block in an old diff hunk!") if scalar @blocklist != 1;
|
|
my $block = $blocklist[0];
|
|
my $op = $block->op; # +, -, or !
|
|
|
|
# Calculate item number range.
|
|
# old diff range is just like a context diff range, except the ranges
|
|
# are on one line with the action between them.
|
|
my $range1 = $hunk->context_range(1);
|
|
my $range2 = $hunk->context_range(2);
|
|
my $action = $op_hash{$op} || warn "unknown op $op";
|
|
$ret .= "$range1$action$range2\n";
|
|
|
|
# If removing anything, just print out all the remove lines in the hunk
|
|
# which is just all the remove lines in the block
|
|
if (my @foo = $block->remove) {
|
|
my @outlist = @$fileref1[$hunk->{"start1"}..$hunk->{"end1"}];
|
|
map {$_ = "< $_\n"} @outlist; # all lines will be '< text\n'
|
|
$ret .= join '', @outlist;
|
|
}
|
|
|
|
$ret .= "---\n" if $op eq '!'; # only if inserting and removing
|
|
if ($block->insert) {
|
|
my @outlist = @$fileref2[$hunk->{"start2"}..$hunk->{"end2"}];
|
|
map {$_ = "> $_\n"} @outlist; # all lines will be '> text\n'
|
|
$ret .= join "", @outlist;
|
|
}
|
|
}
|
|
|
|
sub context_range {
|
|
# Generate a range of item numbers to print. Only print 1 number if the range
|
|
# has only one item in it. Otherwise, it's 'start,end'
|
|
my ($hunk, $flag) = @_;
|
|
my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
|
|
$start++; $end++; # index from 1, not zero
|
|
my $range = ($start < $end) ? "$start,$end" : $end;
|
|
return $range;
|
|
}
|
|
|
|
sub unified_range {
|
|
# Generate a range of item numbers to print for unified diff
|
|
# Print number where block starts, followed by number of lines in the block
|
|
# (don't print number of lines if it's 1)
|
|
my ($hunk, $flag) = @_;
|
|
my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"});
|
|
$start++; $end++; # index from 1, not zero
|
|
my $length = $end - $start + 1;
|
|
my $first = $length < 2 ? $end : $start; # strange, but correct...
|
|
my $range = $length== 1 ? $first : "$first,$length";
|
|
return $range;
|
|
}
|
|
|
|
package GT::FileMan::Diff::Block;
|
|
# Package Block. A block is an operation removing, adding, or changing
|
|
# a group of items. Basically, this is just a list of changes, where each
|
|
# change adds or deletes a single item.
|
|
# (Change could be a separate class, but it didn't seem worth it)
|
|
|
|
sub new {
|
|
# Input is a chunk from &Algorithm::LCS::diff
|
|
# Fields in a block:
|
|
# length_diff - how much longer file 2 is than file 1 due to this block
|
|
# Each change has:
|
|
# sign - '+' for insert, '-' for remove
|
|
# item_no - number of the item in the file (e.g., line number)
|
|
# We don't bother storing the text of the item
|
|
#
|
|
my ($class,$chunk) = @_;
|
|
my @changes = ();
|
|
|
|
# This just turns each change into a hash.
|
|
foreach my $item (@$chunk) {
|
|
my ($sign, $item_no, $text) = @$item;
|
|
my $hashref = {"sign" => $sign, "item_no" => $item_no};
|
|
push @changes, $hashref;
|
|
}
|
|
|
|
my $block = { "changes" => \@changes };
|
|
bless $block, $class;
|
|
|
|
$block->{"length_diff"} = $block->insert - $block->remove;
|
|
return $block;
|
|
}
|
|
|
|
|
|
# LOW LEVEL FUNCTIONS
|
|
sub op {
|
|
# what kind of block is this?
|
|
my $block = shift;
|
|
my $insert = $block->insert;
|
|
my $remove = $block->remove;
|
|
|
|
$remove && $insert and return '!';
|
|
$remove and return '-';
|
|
$insert and return '+';
|
|
warn "unknown block type";
|
|
return '^'; # context block
|
|
}
|
|
|
|
# Returns a list of the changes in this block that remove items
|
|
# (or the number of removals if called in scalar context)
|
|
sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; }
|
|
|
|
# Returns a list of the changes in this block that insert items
|
|
sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; }
|
|
|
|
1;
|