# ================================================================== # File manager - enhanced web based file management system # # Website : http://gossamer-threads.com/ # Support : http://gossamer-threads.com/scripts/support/ # CVS Info : 087,071,086,086,085 # Revision : $Id: Diff.pm,v 1.9 2004/02/17 01:33:07 jagerman Exp $ # # Copyright (c) 2004 Gossamer Threads Inc. All Rights Reserved. # Redistribution in part or in whole strictly prohibited. Please # see LICENSE file for full details. # ================================================================== package GT::FileMan::Diff; # ================================================================== # This module is based off the example scripts distributed with Algorthim::Diff # use strict; use vars qw($VERSION %HTML_ESCAPE); use GT::File::Diff; $VERSION = sprintf "%d.%03d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/; %HTML_ESCAPE = ( '&' => '&', '<' => '<', '>' => '>', '"' => '"' ); my $File_Length_Difference = 0; sub diff { # ----------------------------------------------------------------------------- # Takes two filenames, or two array refs, and returns a text diff. See also # html_diff. Optionally takes an additional number - if provided, you'll get # a unified context diff with however many lines of context as you passed in for # this value, otherwise you'll get a boring old <, >-type diff. # Returns 1 if the first file couldn't be opened, 2 if the second couldn't be # opened, and a scalar reference containing the diff otherwise. # my ($file1, $file2, $context_lines) = @_; my ($f1_mod, $f2_mod, $filename1, $filename2); if (!ref $file1) { my $fh = \do { local *FH; *FH }; open $fh, "<$file1" or return 1; chomp(my @f1 = <$fh>); $f1_mod = (stat $fh)[9]; ($filename1, $file1) = ($file1, \@f1); } if (!ref $file2) { my $fh = \do { local *FH; *FH }; open $fh, "<$file2" or return 2; chomp(my @f2 = <$fh>); $f2_mod = (stat $fh)[9]; ($filename2, $file2) = ($file2, \@f2); } my $ret = ""; my $diff = GT::File::Diff::diff($file1, $file2, \&_hash); return \($ret = "Files are identical") if not @$diff; if ($context_lines and $f1_mod and $f2_mod) { $ret .= "--- $filename1\t" . gmtime($f1_mod) . " -0000\n"; $ret .= "+++ $filename2\t" . gmtime($f2_mod) . " -0000\n"; } $File_Length_Difference = 0; my ($hunk, $oldhunk); for my $piece (@$diff) { $hunk = GT::FileMan::Diff::Hunk->new($file1, $file2, $piece, $context_lines); next unless $oldhunk; if ($context_lines and $hunk->does_overlap($oldhunk)) { $hunk->prepend_hunk($oldhunk); } else { $ret .= $oldhunk->output_diff($file1, $file2, $context_lines); } } continue { $oldhunk = $hunk } $ret .= $oldhunk->output_diff($file1, $file2, $context_lines); \$ret; } # This generates a unique key for the line; we simply take the line and convert # all multiple spaces into a single space to effectively perform a "diff -b". sub _hash { my $str = shift; $str =~ s/^\s+//; $str =~ s/\s+$//; $str =~ s/\s{2,}/ /g; $str; } sub html_diff { # ----------------------------------------------------------------------------- # Works exactly as the above, but also HTML escapes and colorizes the diff. # The first two or three arguments are the same as above, and the last argument # is a hash ref of (ID => html_color) pairs. The ID's available, and defaults, # are as follows (scalar refs make the text also bold): # { file => \"#2e8b57", linenum => \"#a52a2a", sep => "#6a5acd", removed => "#6a5acd", added => "#008b8b" } # - file is used only in unified context diffs to show the filename & last modified time # - linenum is used to indicate the line numbers the change applies to # - sep is used only in non-unified diffs to separate the removed/added lines # - removed is the colour for removed lines # - added is the colour for added lines # The return is the same scalar reference or error number as that of diff(), # but formatted for HTML with escaped HTML where necessary and the whole thing # wrapped in
...
. Note that no checking or HTML escaping is # performed on the colors passed in; it is your responsibility to make sure the # values of the colors hash are safe. # my (@args) = @_; my %colors; %colors = %{pop @args} if ref $args[-1]; $colors{file} ||= \"#2e8b57"; $colors{linenum} ||= \"#a52a2a"; $colors{added} ||= "#008b8b"; $colors{removed} ||= "#6a5acd"; $colors{sep} ||= "#6a5acd"; for (keys %colors) { if (ref $colors{$_}) { $colors{$_} = qq||; $colors{"${_}_close"} = qq||; } else { $colors{$_} = qq||; $colors{"${_}_close"} = qq||; } } my $ret = diff(@args); return $ret unless ref $ret; $$ret =~ s/(["&<>])/$HTML_ESCAPE{$1}/g; $$ret =~ s{^([^ ].*)}{ my $line = $1; if ($line eq '---') { qq{$colors{sep}$line$colors{sep_close}} } elsif (substr($line, 0, 3) eq '---' or substr($line, 0, 3) eq '+++') { qq{$colors{file}$line$colors{file_close}} } elsif (substr($line, 0, 2) eq '@@' or $line =~ /^[0-9]/) { qq{$colors{linenum}$line$colors{linenum_close}} } elsif (substr($line, 0, 1) eq '+' or substr($line, 0, 4) eq '>') { qq{$colors{added}$line$colors{added_close}} } elsif (substr($line, 0, 1) eq '-' or substr($line, 0, 4) eq '<') { qq{$colors{removed}$line$colors{removed_close}} } else { # A mistake? We should never get here, but silently ignore if we do $line } }egm; substr($$ret, 0, 0) = '
';
    $$ret .= '
'; $ret; } # Package Hunk. A Hunk is a group of Blocks which overlap because of the # context surrounding each block. (So if we're not using context, every # hunk will contain one block.) package GT::FileMan::Diff::Hunk; sub new { # Arg1 is output from &LCS::diff (which corresponds to one Block) # Arg2 is the number of items (lines, e.g.,) of context around each block # # This subroutine changes $File_Length_Difference # # Fields in a Hunk: # blocks - a list of Block objects # start - index in file 1 where first block of the hunk starts # end - index in file 1 where last block of the hunk ends # # Variables: # before_diff - how much longer file 2 is than file 1 due to all hunks # until but NOT including this one # after_diff - difference due to all hunks including this one my ($class, $f1, $f2, $piece, $context_items) = @_; my $block = new GT::FileMan::Diff::Block ($piece); # this modifies $FLD! my $before_diff = $File_Length_Difference; # BEFORE this hunk my $after_diff = $before_diff + $block->{"length_diff"}; $File_Length_Difference += $block->{"length_diff"}; # @remove_array and @insert_array hold the items to insert and remove # Save the start & beginning of each array. If the array doesn't exist # though (e.g., we're only adding items in this block), then figure # out the line number based on the line number of the other file and # the current difference in file lenghts my @remove_array = $block->remove; my @insert_array = $block->insert; my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2); $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1; $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1; $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1; $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1; $start1 = $a1 == -1 ? $b1 - $before_diff : $a1; $end1 = $a2 == -1 ? $b2 - $after_diff : $a2; $start2 = $b1 == -1 ? $a1 + $before_diff : $b1; $end2 = $b2 == -1 ? $a2 + $after_diff : $b2; # At first, a hunk will have just one Block in it my $hunk = { "start1" => $start1, "start2" => $start2, "end1" => $end1, "end2" => $end2, "blocks" => [$block], "f1" => $f1, "f2" => $f2 }; bless $hunk, $class; $hunk->flag_context($context_items); return $hunk; } # Change the "start" and "end" fields to note that context should be added # to this hunk sub flag_context { my ($hunk, $context_items) = @_; return unless $context_items; # no context # add context before my $start1 = $hunk->{"start1"}; my $num_added = $context_items > $start1 ? $start1 : $context_items; $hunk->{"start1"} -= $num_added; $hunk->{"start2"} -= $num_added; # context after my $end1 = $hunk->{"end1"}; $num_added = ($end1+$context_items > $#{$hunk->{f1}}) ? $#{$hunk->{f1}} - $end1 : $context_items; $hunk->{"end1"} += $num_added; $hunk->{"end2"} += $num_added; } # Is there an overlap between hunk arg0 and old hunk arg1? # Note: if end of old hunk is one less than beginning of second, they overlap sub does_overlap { my ($hunk, $oldhunk) = @_; return "" unless $oldhunk; # first time through, $oldhunk is empty # Do I actually need to test both? return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 || $hunk->{"start2"} - $oldhunk->{"end2"} <= 1); } # Prepend hunk arg1 to hunk arg0 # Note that arg1 isn't updated! Only arg0 is. sub prepend_hunk { my ($hunk, $oldhunk) = @_; $hunk->{"start1"} = $oldhunk->{"start1"}; $hunk->{"start2"} = $oldhunk->{"start2"}; unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}}); } # DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO... sub output_diff { my $context_diff = $_[3]; if ($context_diff) { return &output_unified_diff } else { return &output_boring_diff } } sub output_unified_diff { my ($hunk, $fileref1, $fileref2) = @_; my @blocklist; my $ret = ""; # Calculate item number range. my $range1 = $hunk->unified_range(1); my $range2 = $hunk->unified_range(2); $ret .= "@@ -$range1 +$range2 @@\n"; # Outlist starts containing the hunk of file 1. # Removing an item just means putting a '-' in front of it. # Inserting an item requires getting it from file2 and splicing it in. # We splice in $num_added items. Remove blocks use $num_added because # splicing changed the length of outlist. # We remove $num_removed items. Insert blocks use $num_removed because # their item numbers---corresponding to positions in file *2*--- don't take # removed items into account. my $low = $hunk->{"start1"}; my $hi = $hunk->{"end1"}; my ($num_added, $num_removed) = (0,0); my @outlist = @$fileref1[$low..$hi]; for (@outlist) { s/^/ / } # assume it's just context foreach my $block (@{$hunk->{"blocks"}}) { foreach my $item ($block->remove) { my $op = $item->{"sign"}; # - my $offset = $item->{"item_no"} - $low + $num_added; $outlist[$offset] =~ s/^ /$op/; $num_removed++; } foreach my $item ($block->insert) { my $op = $item->{"sign"}; # + my $i = $item->{"item_no"}; my $offset = $i - $hunk->{"start2"} + $num_removed; splice(@outlist,$offset,0,"$op$$fileref2[$i]"); $num_added++; } } for (@outlist) { $ret .= "$_\n" } # add \n's $ret; } sub output_boring_diff { # Note that an old diff can't have any context. Therefore, we know that # there's only one block in the hunk. my ($hunk, $fileref1, $fileref2) = @_; my %op_hash = ('+' => 'a', '-' => 'd', '!' => 'c'); my $ret = ''; my @blocklist = @{$hunk->{"blocks"}}; warn ("Expecting one block in an old diff hunk!") if scalar @blocklist != 1; my $block = $blocklist[0]; my $op = $block->op; # +, -, or ! # Calculate item number range. # old diff range is just like a context diff range, except the ranges # are on one line with the action between them. my $range1 = $hunk->context_range(1); my $range2 = $hunk->context_range(2); my $action = $op_hash{$op} || warn "unknown op $op"; $ret .= "$range1$action$range2\n"; # If removing anything, just print out all the remove lines in the hunk # which is just all the remove lines in the block if (my @foo = $block->remove) { my @outlist = @$fileref1[$hunk->{"start1"}..$hunk->{"end1"}]; map {$_ = "< $_\n"} @outlist; # all lines will be '< text\n' $ret .= join '', @outlist; } $ret .= "---\n" if $op eq '!'; # only if inserting and removing if ($block->insert) { my @outlist = @$fileref2[$hunk->{"start2"}..$hunk->{"end2"}]; map {$_ = "> $_\n"} @outlist; # all lines will be '> text\n' $ret .= join "", @outlist; } } sub context_range { # Generate a range of item numbers to print. Only print 1 number if the range # has only one item in it. Otherwise, it's 'start,end' my ($hunk, $flag) = @_; my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"}); $start++; $end++; # index from 1, not zero my $range = ($start < $end) ? "$start,$end" : $end; return $range; } sub unified_range { # Generate a range of item numbers to print for unified diff # Print number where block starts, followed by number of lines in the block # (don't print number of lines if it's 1) my ($hunk, $flag) = @_; my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"}); $start++; $end++; # index from 1, not zero my $length = $end - $start + 1; my $first = $length < 2 ? $end : $start; # strange, but correct... my $range = $length== 1 ? $first : "$first,$length"; return $range; } package GT::FileMan::Diff::Block; # Package Block. A block is an operation removing, adding, or changing # a group of items. Basically, this is just a list of changes, where each # change adds or deletes a single item. # (Change could be a separate class, but it didn't seem worth it) sub new { # Input is a chunk from &Algorithm::LCS::diff # Fields in a block: # length_diff - how much longer file 2 is than file 1 due to this block # Each change has: # sign - '+' for insert, '-' for remove # item_no - number of the item in the file (e.g., line number) # We don't bother storing the text of the item # my ($class,$chunk) = @_; my @changes = (); # This just turns each change into a hash. foreach my $item (@$chunk) { my ($sign, $item_no, $text) = @$item; my $hashref = {"sign" => $sign, "item_no" => $item_no}; push @changes, $hashref; } my $block = { "changes" => \@changes }; bless $block, $class; $block->{"length_diff"} = $block->insert - $block->remove; return $block; } # LOW LEVEL FUNCTIONS sub op { # what kind of block is this? my $block = shift; my $insert = $block->insert; my $remove = $block->remove; $remove && $insert and return '!'; $remove and return '-'; $insert and return '+'; warn "unknown block type"; return '^'; # context block } # Returns a list of the changes in this block that remove items # (or the number of removals if called in scalar context) sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; } # Returns a list of the changes in this block that insert items sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; } 1;