668 lines
22 KiB
HTML
668 lines
22 KiB
HTML
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||
|
<head>
|
||
|
<title>Algorithm::Diff - Compute `intelligent' differences between two files / lists</title>
|
||
|
<link rev="made" href="mailto:root@penguin.office.gossamer-threads.com" />
|
||
|
|
||
|
<style type="text/css">
|
||
|
/* $MVD$:fontset("Untitled Font Set 1","ARIEL","HELVETICA","HELV","SANSERIF") */
|
||
|
/* $MVD$:fontset("Arial","Arial") */
|
||
|
/* $MVD$:fontset("Arial Black","Arial Black") */
|
||
|
/* $MVD$:fontset("Algerian","Algerian") */
|
||
|
|
||
|
|
||
|
body {
|
||
|
background-color: white;
|
||
|
font-family: Verdana, Arial, sans-serif;
|
||
|
font-size: small;
|
||
|
color: black;
|
||
|
}
|
||
|
|
||
|
|
||
|
p {
|
||
|
background-color : white;
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
h1 {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-weight : bold;
|
||
|
font-size : medium;
|
||
|
background-color : white;
|
||
|
color : maroon;
|
||
|
}
|
||
|
|
||
|
|
||
|
h2 {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : medium;
|
||
|
font-weight : bold;
|
||
|
color : blue;
|
||
|
background-color : white;
|
||
|
}
|
||
|
|
||
|
|
||
|
h3 {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-weight : bold;
|
||
|
font-size : medium;
|
||
|
color : black;
|
||
|
background-color : white;
|
||
|
}
|
||
|
|
||
|
|
||
|
h4 {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-weight : bold;
|
||
|
font-size : small;
|
||
|
color : maroon;
|
||
|
background-color : white;
|
||
|
}
|
||
|
|
||
|
|
||
|
h5 {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-weight : bold;
|
||
|
font-size : small;
|
||
|
color : blue;
|
||
|
background-color : white;
|
||
|
}
|
||
|
|
||
|
|
||
|
h6 {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-weight : bold;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
background-color : white;
|
||
|
}
|
||
|
|
||
|
|
||
|
ul {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
ol {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
dl {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
li {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
th {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
td {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
dl {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
dd {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
dt {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
code {
|
||
|
font-family : Courier;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
|
||
|
pre {
|
||
|
font-family : Courier;
|
||
|
font-size : small;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
.mvd-H1 {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-weight : bold;
|
||
|
font-size : 14.0pt;
|
||
|
background-color : transparent;
|
||
|
background-image : none;
|
||
|
color : maroon;
|
||
|
}
|
||
|
|
||
|
|
||
|
.mvd-H2 {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : 12.0pt;
|
||
|
color : blue;
|
||
|
}
|
||
|
|
||
|
|
||
|
p.indent {
|
||
|
font-family : "Verdana, Arial, sans-serif";
|
||
|
list-style-type : circle;
|
||
|
list-style-position : inside;
|
||
|
color : black;
|
||
|
margin-left : 16.0pt;
|
||
|
}
|
||
|
|
||
|
|
||
|
.mvd-P-indent {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
list-style-type : circle;
|
||
|
list-style-position : inside;
|
||
|
color : black;
|
||
|
margin-left : 16.0pt;
|
||
|
}
|
||
|
|
||
|
|
||
|
pre.programlisting {
|
||
|
font-size : 9.0pt;
|
||
|
list-style-type : disc;
|
||
|
margin-left : 16.0pt;
|
||
|
margin-top : -14.0pt;
|
||
|
}
|
||
|
|
||
|
|
||
|
.mvd-PRE-programlisting {
|
||
|
font-size : 9.0pt;
|
||
|
list-style-type : disc;
|
||
|
margin-left : 16.0pt;
|
||
|
margin-top : -14.0pt;
|
||
|
}
|
||
|
|
||
|
|
||
|
.mvd-PRE {
|
||
|
font-size : 9.0pt;
|
||
|
}
|
||
|
|
||
|
|
||
|
p.note {
|
||
|
margin-left : 28.0pt;
|
||
|
}
|
||
|
|
||
|
|
||
|
.mvd-P-note {
|
||
|
margin-left : 28.0pt;
|
||
|
}
|
||
|
|
||
|
|
||
|
.mvd-H4 {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-weight : normal;
|
||
|
font-size : 9.0pt;
|
||
|
color : black;
|
||
|
margin-left : 6.0pt;
|
||
|
margin-top : -14.0pt;
|
||
|
}
|
||
|
|
||
|
|
||
|
.mvd-P {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
font-size : 10.0pt;
|
||
|
color : black;
|
||
|
}
|
||
|
|
||
|
.mvd-BODY {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
background-color : white;
|
||
|
}
|
||
|
|
||
|
|
||
|
p.indentnobullet {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
list-style-type : none;
|
||
|
}
|
||
|
|
||
|
|
||
|
.mvd-P-indentnobullet {
|
||
|
font-family : Verdana, Arial, sans-serif;
|
||
|
list-style-type : none;
|
||
|
}
|
||
|
</style>
|
||
|
|
||
|
|
||
|
</head>
|
||
|
|
||
|
<body style="background-color: white">
|
||
|
|
||
|
<p><a name="__index__"></a></p>
|
||
|
<!-- INDEX BEGIN -->
|
||
|
|
||
|
<ul>
|
||
|
|
||
|
<li><a href="#name">NAME</a></li>
|
||
|
<li><a href="#synopsis">SYNOPSIS</a></li>
|
||
|
<li><a href="#introduction">INTRODUCTION</a></li>
|
||
|
<li><a href="#usage">USAGE</a></li>
|
||
|
<ul>
|
||
|
|
||
|
<li><a href="#lcs"><code>LCS</code></a></li>
|
||
|
<li><a href="#diff"><code>diff</code></a></li>
|
||
|
<li><a href="#sdiff"><code>sdiff</code></a></li>
|
||
|
<li><a href="#traverse_sequences"><code>traverse_sequences</code></a></li>
|
||
|
<li><a href="#traverse_balanced"><code>traverse_balanced</code></a></li>
|
||
|
</ul>
|
||
|
|
||
|
<li><a href="#key_generation_functions">KEY GENERATION FUNCTIONS</a></li>
|
||
|
<li><a href="#author">AUTHOR</a></li>
|
||
|
<li><a href="#license">LICENSE</a></li>
|
||
|
<li><a href="#credits">CREDITS</a></li>
|
||
|
</ul>
|
||
|
<!-- INDEX END -->
|
||
|
|
||
|
<hr />
|
||
|
<p>
|
||
|
</p>
|
||
|
<h1><a name="name">NAME</a></h1>
|
||
|
<p>Algorithm::Diff - Compute `intelligent' differences between two files / lists</p>
|
||
|
<p>
|
||
|
</p>
|
||
|
<hr />
|
||
|
<h1><a name="synopsis">SYNOPSIS</a></h1>
|
||
|
<pre>
|
||
|
use GT::File::Diff qw(diff sdiff LCS traverse_sequences
|
||
|
traverse_balanced);</pre>
|
||
|
<pre>
|
||
|
@lcs = LCS( \@seq1, \@seq2 );</pre>
|
||
|
<pre>
|
||
|
@lcs = LCS( \@seq1, \@seq2, $key_generation_function );</pre>
|
||
|
<pre>
|
||
|
$lcsref = LCS( \@seq1, \@seq2 );</pre>
|
||
|
<pre>
|
||
|
$lcsref = LCS( \@seq1, \@seq2, $key_generation_function );</pre>
|
||
|
<pre>
|
||
|
@diffs = diff( \@seq1, \@seq2 );</pre>
|
||
|
<pre>
|
||
|
@diffs = diff( \@seq1, \@seq2, $key_generation_function );</pre>
|
||
|
<pre>
|
||
|
@sdiffs = sdiff( \@seq1, \@seq2 );</pre>
|
||
|
<pre>
|
||
|
@sdiffs = sdiff( \@seq1, \@seq2, $key_generation_function );
|
||
|
|
||
|
traverse_sequences( \@seq1, \@seq2,
|
||
|
{ MATCH => $callback,
|
||
|
DISCARD_A => $callback,
|
||
|
DISCARD_B => $callback,
|
||
|
} );</pre>
|
||
|
<pre>
|
||
|
traverse_sequences( \@seq1, \@seq2,
|
||
|
{ MATCH => $callback,
|
||
|
DISCARD_A => $callback,
|
||
|
DISCARD_B => $callback,
|
||
|
},
|
||
|
$key_generation_function );</pre>
|
||
|
<pre>
|
||
|
traverse_balanced( \@seq1, \@seq2,
|
||
|
{ MATCH => $callback,
|
||
|
DISCARD_A => $callback,
|
||
|
DISCARD_B => $callback,
|
||
|
CHANGE => $callback,
|
||
|
} );</pre>
|
||
|
<p>
|
||
|
</p>
|
||
|
<hr />
|
||
|
<h1><a name="introduction">INTRODUCTION</a></h1>
|
||
|
<p>(by Mark-Jason Dominus)</p>
|
||
|
<p>I once read an article written by the authors of <code>diff</code>; they said
|
||
|
that they hard worked very hard on the algorithm until they found the
|
||
|
right one.</p>
|
||
|
<p>I think what they ended up using (and I hope someone will correct me,
|
||
|
because I am not very confident about this) was the `longest common
|
||
|
subsequence' method. in the LCS problem, you have two sequences of
|
||
|
items:</p>
|
||
|
<pre>
|
||
|
a b c d f g h j q z</pre>
|
||
|
<pre>
|
||
|
a b c d e f g i j k r x y z</pre>
|
||
|
<p>and you want to find the longest sequence of items that is present in
|
||
|
both original sequences in the same order. That is, you want to find
|
||
|
a new sequence <em>S</em> which can be obtained from the first sequence by
|
||
|
deleting some items, and from the secend sequence by deleting other
|
||
|
items. You also want <em>S</em> to be as long as possible. In this case
|
||
|
<em>S</em> is</p>
|
||
|
<pre>
|
||
|
a b c d f g j z</pre>
|
||
|
<p>From there it's only a small step to get diff-like output:</p>
|
||
|
<pre>
|
||
|
e h i k q r x y
|
||
|
+ - + + - + + +</pre>
|
||
|
<p>This module solves the LCS problem. It also includes a canned
|
||
|
function to generate <code>diff</code>-like output.</p>
|
||
|
<p>It might seem from the example above that the LCS of two sequences is
|
||
|
always pretty obvious, but that's not always the case, especially when
|
||
|
the two sequences have many repeated elements. For example, consider</p>
|
||
|
<pre>
|
||
|
a x b y c z p d q
|
||
|
a b c a x b y c z</pre>
|
||
|
<p>A naive approach might start by matching up the <code>a</code> and <code>b</code> that
|
||
|
appear at the beginning of each sequence, like this:</p>
|
||
|
<pre>
|
||
|
a x b y c z p d q
|
||
|
a b c a b y c z</pre>
|
||
|
<p>This finds the common subsequence <code>a b c z</code>. But actually, the LCS
|
||
|
is <code>a x b y c z</code>:</p>
|
||
|
<pre>
|
||
|
a x b y c z p d q
|
||
|
a b c a x b y c z</pre>
|
||
|
<p>
|
||
|
</p>
|
||
|
<hr />
|
||
|
<h1><a name="usage">USAGE</a></h1>
|
||
|
<p>This module provides three exportable functions, which we'll deal with in
|
||
|
ascending order of difficulty: <code>LCS</code>,
|
||
|
<code>diff</code>, <code>sdiff</code>, <code>traverse_sequences</code>, and <code>traverse_balanced</code>.</p>
|
||
|
<p>
|
||
|
</p>
|
||
|
<h2><a name="lcs"><code>LCS</code></a></h2>
|
||
|
<p>Given references to two lists of items, LCS returns an array containing their
|
||
|
longest common subsequence. In scalar context, it returns a reference to
|
||
|
such a list.</p>
|
||
|
<pre>
|
||
|
@lcs = LCS( \@seq1, \@seq2 );
|
||
|
$lcsref = LCS( \@seq1, \@seq2 );</pre>
|
||
|
<p><code>LCS</code> may be passed an optional third parameter; this is a CODE
|
||
|
reference to a key generation function. See <a href="#key_generation_functions">KEY GENERATION FUNCTIONS</a>.</p>
|
||
|
<pre>
|
||
|
@lcs = LCS( \@seq1, \@seq2, $keyGen );
|
||
|
$lcsref = LCS( \@seq1, \@seq2, $keyGen );</pre>
|
||
|
<p>Additional parameters, if any, will be passed to the key generation
|
||
|
routine.</p>
|
||
|
<p>
|
||
|
</p>
|
||
|
<h2><a name="diff"><code>diff</code></a></h2>
|
||
|
<pre>
|
||
|
@diffs = diff( \@seq1, \@seq2 );
|
||
|
$diffs_ref = diff( \@seq1, \@seq2 );</pre>
|
||
|
<p><code>diff</code> computes the smallest set of additions and deletions necessary
|
||
|
to turn the first sequence into the second, and returns a description
|
||
|
of these changes. The description is a list of <em>hunks</em>; each hunk
|
||
|
represents a contiguous section of items which should be added,
|
||
|
deleted, or replaced. The return value of <code>diff</code> is a list of
|
||
|
hunks, or, in scalar context, a reference to such a list.</p>
|
||
|
<p>Here is an example: The diff of the following two sequences:</p>
|
||
|
<pre>
|
||
|
a b c e h j l m n p
|
||
|
b c d e f j k l m r s t</pre>
|
||
|
<p>Result:</p>
|
||
|
<pre>
|
||
|
[
|
||
|
[ [ '-', 0, 'a' ] ],</pre>
|
||
|
<pre>
|
||
|
[ [ '+', 2, 'd' ] ],</pre>
|
||
|
<pre>
|
||
|
[ [ '-', 4, 'h' ] ,
|
||
|
[ '+', 4, 'f' ] ],</pre>
|
||
|
<pre>
|
||
|
[ [ '+', 6, 'k' ] ],</pre>
|
||
|
<pre>
|
||
|
[ [ '-', 8, 'n' ],
|
||
|
[ '-', 9, 'p' ],
|
||
|
[ '+', 9, 'r' ],
|
||
|
[ '+', 10, 's' ],
|
||
|
[ '+', 11, 't' ],
|
||
|
]
|
||
|
]</pre>
|
||
|
<p>There are five hunks here. The first hunk says that the <code>a</code> at
|
||
|
position 0 of the first sequence should be deleted (<code>-</code>). The second
|
||
|
hunk says that the <code>d</code> at position 2 of the second sequence should
|
||
|
be inserted (<code>+</code>). The third hunk says that the <code>h</code> at position 4
|
||
|
of the first sequence should be removed and replaced with the <code>f</code>
|
||
|
from position 4 of the second sequence. The other two hunks similarly.</p>
|
||
|
<p><code>diff</code> may be passed an optional third parameter; this is a CODE
|
||
|
reference to a key generation function. See <a href="#key_generation_functions">KEY GENERATION FUNCTIONS</a>.</p>
|
||
|
<p>Additional parameters, if any, will be passed to the key generation
|
||
|
routine.</p>
|
||
|
<p>
|
||
|
</p>
|
||
|
<h2><a name="sdiff"><code>sdiff</code></a></h2>
|
||
|
<pre>
|
||
|
@sdiffs = sdiff( \@seq1, \@seq2 );
|
||
|
$sdiffs_ref = sdiff( \@seq1, \@seq2 );</pre>
|
||
|
<p><code>sdiff</code> computes all necessary components to show two sequences
|
||
|
and their minimized differences side by side, just like the
|
||
|
Unix-utility <em>sdiff</em> does:</p>
|
||
|
<pre>
|
||
|
same same
|
||
|
before | after
|
||
|
old < -
|
||
|
- > new</pre>
|
||
|
<p>It returns a list of array refs, each pointing to an array of
|
||
|
display instructions. In scalar context it returns a reference
|
||
|
to such a list.</p>
|
||
|
<p>Display instructions consist of three elements: A modifier indicator
|
||
|
(<code>+</code>: Element added, <code>-</code>: Element removed, <code>u</code>: Element unmodified,
|
||
|
<code>c</code>: Element changed) and the value of the old and new elements, to
|
||
|
be displayed side by side.</p>
|
||
|
<p>An <code>sdiff</code> of the following two sequences:</p>
|
||
|
<pre>
|
||
|
a b c e h j l m n p
|
||
|
b c d e f j k l m r s t</pre>
|
||
|
<p>results in</p>
|
||
|
<p>[ [ '-', 'a', '' ],
|
||
|
[ 'u', 'b', 'b' ],
|
||
|
[ 'u', 'c', 'c' ],
|
||
|
[ '+', '', 'd' ],
|
||
|
[ 'u', 'e', 'e' ],
|
||
|
[ 'c', 'h', 'f' ],
|
||
|
[ 'u', 'j', 'j' ],
|
||
|
[ '+', '', 'k' ],
|
||
|
[ 'u', 'l', 'l' ],
|
||
|
[ 'u', 'm', 'm' ],
|
||
|
[ 'c', 'n', 'r' ],
|
||
|
[ 'c', 'p', 's' ],
|
||
|
[ '+', '', 't' ] ]</p>
|
||
|
<p><code>sdiff</code> may be passed an optional third parameter; this is a CODE
|
||
|
reference to a key generation function. See <a href="#key_generation_functions">KEY GENERATION FUNCTIONS</a>.</p>
|
||
|
<p>Additional parameters, if any, will be passed to the key generation
|
||
|
routine.</p>
|
||
|
<p>
|
||
|
</p>
|
||
|
<h2><a name="traverse_sequences"><code>traverse_sequences</code></a></h2>
|
||
|
<p><code>traverse_sequences</code> is the most general facility provided by this
|
||
|
module; <code>diff</code> and <code>LCS</code> are implemented as calls to it.</p>
|
||
|
<p>Imagine that there are two arrows. Arrow A points to an element of sequence A,
|
||
|
and arrow B points to an element of the sequence B. Initially, the arrows
|
||
|
point to the first elements of the respective sequences. <code>traverse_sequences</code>
|
||
|
will advance the arrows through the sequences one element at a time, calling an
|
||
|
appropriate user-specified callback function before each advance. It
|
||
|
willadvance the arrows in such a way that if there are equal elements <code>$A[$i]</code>
|
||
|
and <code>$B[$j]</code> which are equal and which are part of the LCS, there will be
|
||
|
some moment during the execution of <code>traverse_sequences</code> when arrow A is
|
||
|
pointing to <code>$A[$i]</code> and arrow B is pointing to <code>$B[$j]</code>. When this happens,
|
||
|
<code>traverse_sequences</code> will call the <code>MATCH</code> callback function and then it will
|
||
|
advance both arrows.</p>
|
||
|
<p>Otherwise, one of the arrows is pointing to an element of its sequence that is
|
||
|
not part of the LCS. <code>traverse_sequences</code> will advance that arrow and will
|
||
|
call the <code>DISCARD_A</code> or the <code>DISCARD_B</code> callback, depending on which arrow it
|
||
|
advanced. If both arrows point to elements that are not part of the LCS, then
|
||
|
<code>traverse_sequences</code> will advance one of them and call the appropriate
|
||
|
callback, but it is not specified which it will call.</p>
|
||
|
<p>The arguments to <code>traverse_sequences</code> are the two sequences to traverse, and a
|
||
|
hash which specifies the callback functions, like this:</p>
|
||
|
<pre>
|
||
|
traverse_sequences( \@seq1, \@seq2,
|
||
|
{ MATCH => $callback_1,
|
||
|
DISCARD_A => $callback_2,
|
||
|
DISCARD_B => $callback_3,
|
||
|
} );</pre>
|
||
|
<p>Callbacks for MATCH, DISCARD_A, and DISCARD_B are invoked with at least the
|
||
|
indices of the two arrows as their arguments. They are not expected to return
|
||
|
any values. If a callback is omitted from the table, it is not called.</p>
|
||
|
<p>Callbacks for A_FINISHED and B_FINISHED are invoked with at least the
|
||
|
corresponding index in A or B.</p>
|
||
|
<p>If arrow A reaches the end of its sequence, before arrow B does,
|
||
|
<code>traverse_sequences</code> will call the <code>A_FINISHED</code> callback when it advances
|
||
|
arrow B, if there is such a function; if not it will call <code>DISCARD_B</code> instead.
|
||
|
Similarly if arrow B finishes first. <code>traverse_sequences</code> returns when both
|
||
|
arrows are at the ends of their respective sequences. It returns true on
|
||
|
success and false on failure. At present there is no way to fail.</p>
|
||
|
<p><code>traverse_sequences</code> may be passed an optional fourth parameter; this is a
|
||
|
CODE reference to a key generation function. See <a href="#key_generation_functions">KEY GENERATION FUNCTIONS</a>.</p>
|
||
|
<p>Additional parameters, if any, will be passed to the key generation function.</p>
|
||
|
<p>
|
||
|
</p>
|
||
|
<h2><a name="traverse_balanced"><code>traverse_balanced</code></a></h2>
|
||
|
<p><code>traverse_balanced</code> is an alternative to <code>traverse_sequences</code>. It
|
||
|
uses a different algorithm to iterate through the entries in the
|
||
|
computed LCS. Instead of sticking to one side and showing element changes
|
||
|
as insertions and deletions only, it will jump back and forth between
|
||
|
the two sequences and report <em>changes</em> occurring as deletions on one
|
||
|
side followed immediatly by an insertion on the other side.</p>
|
||
|
<p>In addition to the
|
||
|
<code>DISCARD_A</code>,
|
||
|
<code>DISCARD_B</code>, and
|
||
|
<code>MATCH</code>
|
||
|
callbacks supported by <code>traverse_sequences</code>, <code>traverse_balanced</code> supports
|
||
|
a <code>CHANGE</code> callback indicating that one element got <code>replaced</code> by another:</p>
|
||
|
<pre>
|
||
|
traverse_sequences( \@seq1, \@seq2,
|
||
|
{ MATCH => $callback_1,
|
||
|
DISCARD_A => $callback_2,
|
||
|
DISCARD_B => $callback_3,
|
||
|
CHANGE => $callback_4,
|
||
|
} );</pre>
|
||
|
<p>If no <code>CHANGE</code> callback is specified, <code>traverse_balanced</code>
|
||
|
will map <code>CHANGE</code> events to <code>DISCARD_A</code> and <code>DISCARD_B</code> actions,
|
||
|
therefore resulting in a similar behaviour as <code>traverse_sequences</code>
|
||
|
with different order of events.</p>
|
||
|
<p><code>traverse_balanced</code> might be a bit slower than <code>traverse_sequences</code>,
|
||
|
noticable only while processing huge amounts of data.</p>
|
||
|
<p>The <code>sdiff</code> function of this module
|
||
|
is implemented as call to <code>traverse_balanced</code>.</p>
|
||
|
<p>
|
||
|
</p>
|
||
|
<hr />
|
||
|
<h1><a name="key_generation_functions">KEY GENERATION FUNCTIONS</a></h1>
|
||
|
<p><code>diff</code>, <code>LCS</code>, and <code>traverse_sequences</code> accept an optional last parameter.
|
||
|
This is a CODE reference to a key generating (hashing) function that should
|
||
|
return a string that uniquely identifies a given element. It should be the
|
||
|
case that if two elements are to be considered equal, their keys should be the
|
||
|
same (and the other way around). If no key generation function is provided,
|
||
|
the key will be the element as a string.</p>
|
||
|
<p>By default, comparisons will use ``eq'' and elements will be turned into keys
|
||
|
using the default stringizing operator '``'''.</p>
|
||
|
<p>Where this is important is when you're comparing something other than strings.
|
||
|
If it is the case that you have multiple different objects that should be
|
||
|
considered to be equal, you should supply a key generation function. Otherwise,
|
||
|
you have to make sure that your arrays contain unique references.</p>
|
||
|
<p>For instance, consider this example:</p>
|
||
|
<pre>
|
||
|
package Person;</pre>
|
||
|
<pre>
|
||
|
sub new
|
||
|
{
|
||
|
my $package = shift;
|
||
|
return bless { name => '', ssn => '', @_ }, $package;
|
||
|
}</pre>
|
||
|
<pre>
|
||
|
sub clone
|
||
|
{
|
||
|
my $old = shift;
|
||
|
my $new = bless { %$old }, ref($old);
|
||
|
}</pre>
|
||
|
<pre>
|
||
|
sub hash
|
||
|
{
|
||
|
return shift()->{'ssn'};
|
||
|
}</pre>
|
||
|
<pre>
|
||
|
my $person1 = Person->new( name => 'Joe', ssn => '123-45-6789' );
|
||
|
my $person2 = Person->new( name => 'Mary', ssn => '123-47-0000' );
|
||
|
my $person3 = Person->new( name => 'Pete', ssn => '999-45-2222' );
|
||
|
my $person4 = Person->new( name => 'Peggy', ssn => '123-45-9999' );
|
||
|
my $person5 = Person->new( name => 'Frank', ssn => '000-45-9999' );</pre>
|
||
|
<p>If you did this:</p>
|
||
|
<pre>
|
||
|
my $array1 = [ $person1, $person2, $person4 ];
|
||
|
my $array2 = [ $person1, $person3, $person4, $person5 ];
|
||
|
GT::File::Diff::diff( $array1, $array2 );</pre>
|
||
|
<p>everything would work out OK (each of the objects would be converted
|
||
|
into a string like ``Person=HASH(0x82425b0)'' for comparison).</p>
|
||
|
<p>But if you did this:</p>
|
||
|
<pre>
|
||
|
my $array1 = [ $person1, $person2, $person4 ];
|
||
|
my $array2 = [ $person1, $person3, $person4->clone(), $person5 ];
|
||
|
GT::File::Diff::diff( $array1, $array2 );</pre>
|
||
|
<p>$person4 and $person4-><code>clone()</code> (which have the same name and SSN)
|
||
|
would be seen as different objects. If you wanted them to be considered
|
||
|
equivalent, you would have to pass in a key generation function:</p>
|
||
|
<pre>
|
||
|
my $array1 = [ $person1, $person2, $person4 ];
|
||
|
my $array2 = [ $person1, $person3, $person4->clone(), $person5 ];
|
||
|
GT::File::Diff::diff( $array1, $array2, \&Person::hash );</pre>
|
||
|
<p>This would use the 'ssn' field in each Person as a comparison key, and
|
||
|
so would consider $person4 and $person4-><code>clone()</code> as equal.</p>
|
||
|
<p>You may also pass additional parameters to the key generation function
|
||
|
if you wish.</p>
|
||
|
<p>
|
||
|
</p>
|
||
|
<hr />
|
||
|
<h1><a name="author">AUTHOR</a></h1>
|
||
|
<p>This version by Ned Konz, <a href="mailto:perl@bike-nomad.com">perl@bike-nomad.com</a></p>
|
||
|
<p>
|
||
|
</p>
|
||
|
<hr />
|
||
|
<h1><a name="license">LICENSE</a></h1>
|
||
|
<p>Copyright (c) 2000-2002 Ned Konz. All rights reserved.
|
||
|
This program is free software;
|
||
|
you can redistribute it and/or modify it under the same terms
|
||
|
as Perl itself.</p>
|
||
|
<p>
|
||
|
</p>
|
||
|
<hr />
|
||
|
<h1><a name="credits">CREDITS</a></h1>
|
||
|
<p>Versions through 0.59 (and much of this documentation) were written by:</p>
|
||
|
<p>Mark-Jason Dominus, <a href="mailto:mjd-perl-diff@plover.com">mjd-perl-diff@plover.com</a></p>
|
||
|
<p>This version borrows the documentation and names of the routines
|
||
|
from Mark-Jason's, but has all new code in Diff.pm.</p>
|
||
|
<p>This code was adapted from the Smalltalk code of
|
||
|
Mario Wolczko <<a href="mailto:mario@wolczko.com">mario@wolczko.com</a>>, which is available at
|
||
|
<a href="ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st">ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st</a></p>
|
||
|
<p><code>sdiff</code> and <code>traverse_balanced</code> were written by Mike Schilli
|
||
|
<<a href="mailto:m@perlmeister.com">m@perlmeister.com</a>>.</p>
|
||
|
<p>The algorithm is that described in
|
||
|
<em>A Fast Algorithm for Computing Longest Common Subsequences</em>,
|
||
|
CACM, vol.20, no.5, pp.350-353, May 1977, with a few
|
||
|
minor improvements to improve the speed.</p>
|
||
|
|
||
|
</body>
|
||
|
|
||
|
</html>
|