668 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
			
		
		
	
	
			668 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			HTML
		
	
	
	
	
	
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 | 
						|
<html xmlns="http://www.w3.org/1999/xhtml">
 | 
						|
<head>
 | 
						|
<title>Algorithm::Diff - Compute `intelligent' differences between two files / lists</title>
 | 
						|
<link rev="made" href="mailto:root@penguin.office.gossamer-threads.com" />
 | 
						|
 | 
						|
<style type="text/css">
 | 
						|
/* $MVD$:fontset("Untitled Font Set 1","ARIEL","HELVETICA","HELV","SANSERIF") */
 | 
						|
/* $MVD$:fontset("Arial","Arial") */
 | 
						|
/* $MVD$:fontset("Arial Black","Arial Black") */
 | 
						|
/* $MVD$:fontset("Algerian","Algerian") */
 | 
						|
 | 
						|
 | 
						|
body {
 | 
						|
    background-color: white;
 | 
						|
    font-family: Verdana, Arial, sans-serif;
 | 
						|
    font-size: small;
 | 
						|
    color: black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
p {
 | 
						|
    background-color : white;
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
h1 {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-weight : bold;
 | 
						|
    font-size : medium;
 | 
						|
    background-color : white;
 | 
						|
    color : maroon;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
h2 {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : medium;
 | 
						|
    font-weight : bold;
 | 
						|
    color : blue;
 | 
						|
    background-color : white;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
h3 {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-weight : bold;
 | 
						|
    font-size : medium;
 | 
						|
    color : black;
 | 
						|
    background-color : white;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
h4 {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-weight : bold;
 | 
						|
    font-size : small;
 | 
						|
    color : maroon;
 | 
						|
    background-color : white;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
h5 {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-weight : bold;
 | 
						|
    font-size : small;
 | 
						|
    color : blue;
 | 
						|
    background-color : white;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
h6 {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-weight : bold;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
    background-color : white;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
ul {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
ol {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dl {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
li {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
th {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
td {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dl {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dd {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
dt {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
code {
 | 
						|
    font-family : Courier;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
pre {
 | 
						|
    font-family : Courier;
 | 
						|
    font-size : small;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
.mvd-H1 {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-weight : bold;
 | 
						|
    font-size : 14.0pt;
 | 
						|
    background-color : transparent;
 | 
						|
    background-image : none;
 | 
						|
    color : maroon;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
.mvd-H2 {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : 12.0pt;
 | 
						|
    color : blue;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
p.indent {
 | 
						|
    font-family : "Verdana, Arial, sans-serif";
 | 
						|
    list-style-type : circle;
 | 
						|
    list-style-position : inside;
 | 
						|
    color : black;
 | 
						|
    margin-left : 16.0pt;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
.mvd-P-indent {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    list-style-type : circle;
 | 
						|
    list-style-position : inside;
 | 
						|
    color : black;
 | 
						|
    margin-left : 16.0pt;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
pre.programlisting {
 | 
						|
    font-size : 9.0pt;
 | 
						|
    list-style-type : disc;
 | 
						|
    margin-left : 16.0pt;
 | 
						|
    margin-top : -14.0pt;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
.mvd-PRE-programlisting {
 | 
						|
    font-size : 9.0pt;
 | 
						|
    list-style-type : disc;
 | 
						|
    margin-left : 16.0pt;
 | 
						|
    margin-top : -14.0pt;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
.mvd-PRE {
 | 
						|
    font-size : 9.0pt;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
p.note {
 | 
						|
    margin-left : 28.0pt;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
.mvd-P-note {
 | 
						|
    margin-left : 28.0pt;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
.mvd-H4 {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-weight : normal;
 | 
						|
    font-size : 9.0pt;
 | 
						|
    color : black;
 | 
						|
    margin-left : 6.0pt;
 | 
						|
    margin-top : -14.0pt;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
.mvd-P {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    font-size : 10.0pt;
 | 
						|
    color : black;
 | 
						|
}
 | 
						|
 | 
						|
.mvd-BODY {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    background-color : white;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
p.indentnobullet {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    list-style-type : none;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
.mvd-P-indentnobullet {
 | 
						|
    font-family : Verdana, Arial, sans-serif;
 | 
						|
    list-style-type : none;
 | 
						|
}
 | 
						|
</style>
 | 
						|
 | 
						|
 | 
						|
</head>
 | 
						|
 | 
						|
<body style="background-color: white">
 | 
						|
 | 
						|
<p><a name="__index__"></a></p>
 | 
						|
<!-- INDEX BEGIN -->
 | 
						|
 | 
						|
<ul>
 | 
						|
 | 
						|
	<li><a href="#name">NAME</a></li>
 | 
						|
	<li><a href="#synopsis">SYNOPSIS</a></li>
 | 
						|
	<li><a href="#introduction">INTRODUCTION</a></li>
 | 
						|
	<li><a href="#usage">USAGE</a></li>
 | 
						|
	<ul>
 | 
						|
 | 
						|
		<li><a href="#lcs"><code>LCS</code></a></li>
 | 
						|
		<li><a href="#diff"><code>diff</code></a></li>
 | 
						|
		<li><a href="#sdiff"><code>sdiff</code></a></li>
 | 
						|
		<li><a href="#traverse_sequences"><code>traverse_sequences</code></a></li>
 | 
						|
		<li><a href="#traverse_balanced"><code>traverse_balanced</code></a></li>
 | 
						|
	</ul>
 | 
						|
 | 
						|
	<li><a href="#key_generation_functions">KEY GENERATION FUNCTIONS</a></li>
 | 
						|
	<li><a href="#author">AUTHOR</a></li>
 | 
						|
	<li><a href="#license">LICENSE</a></li>
 | 
						|
	<li><a href="#credits">CREDITS</a></li>
 | 
						|
</ul>
 | 
						|
<!-- INDEX END -->
 | 
						|
 | 
						|
<hr />
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<h1><a name="name">NAME</a></h1>
 | 
						|
<p>Algorithm::Diff - Compute `intelligent' differences between two files / lists</p>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<hr />
 | 
						|
<h1><a name="synopsis">SYNOPSIS</a></h1>
 | 
						|
<pre>
 | 
						|
  use GT::File::Diff qw(diff sdiff LCS traverse_sequences
 | 
						|
                         traverse_balanced);</pre>
 | 
						|
<pre>
 | 
						|
  @lcs    = LCS( \@seq1, \@seq2 );</pre>
 | 
						|
<pre>
 | 
						|
  @lcs    = LCS( \@seq1, \@seq2, $key_generation_function );</pre>
 | 
						|
<pre>
 | 
						|
  $lcsref = LCS( \@seq1, \@seq2 );</pre>
 | 
						|
<pre>
 | 
						|
  $lcsref = LCS( \@seq1, \@seq2, $key_generation_function );</pre>
 | 
						|
<pre>
 | 
						|
  @diffs = diff( \@seq1, \@seq2 );</pre>
 | 
						|
<pre>
 | 
						|
  @diffs = diff( \@seq1, \@seq2, $key_generation_function );</pre>
 | 
						|
<pre>
 | 
						|
  @sdiffs = sdiff( \@seq1, \@seq2 );</pre>
 | 
						|
<pre>
 | 
						|
  @sdiffs = sdiff( \@seq1, \@seq2, $key_generation_function );
 | 
						|
  
 | 
						|
  traverse_sequences( \@seq1, \@seq2,
 | 
						|
                     { MATCH => $callback,
 | 
						|
                       DISCARD_A => $callback,
 | 
						|
                       DISCARD_B => $callback,
 | 
						|
                     } );</pre>
 | 
						|
<pre>
 | 
						|
  traverse_sequences( \@seq1, \@seq2,
 | 
						|
                     { MATCH => $callback,
 | 
						|
                       DISCARD_A => $callback,
 | 
						|
                       DISCARD_B => $callback,
 | 
						|
                     },
 | 
						|
                     $key_generation_function );</pre>
 | 
						|
<pre>
 | 
						|
  traverse_balanced( \@seq1, \@seq2,
 | 
						|
                     { MATCH => $callback,
 | 
						|
                       DISCARD_A => $callback,
 | 
						|
                       DISCARD_B => $callback,
 | 
						|
                       CHANGE    => $callback,
 | 
						|
                     } );</pre>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<hr />
 | 
						|
<h1><a name="introduction">INTRODUCTION</a></h1>
 | 
						|
<p>(by Mark-Jason Dominus)</p>
 | 
						|
<p>I once read an article written by the authors of <code>diff</code>; they said
 | 
						|
that they hard worked very hard on the algorithm until they found the
 | 
						|
right one.</p>
 | 
						|
<p>I think what they ended up using (and I hope someone will correct me,
 | 
						|
because I am not very confident about this) was the `longest common
 | 
						|
subsequence' method.  in the LCS problem, you have two sequences of
 | 
						|
items:</p>
 | 
						|
<pre>
 | 
						|
        a b c d f g h j q z</pre>
 | 
						|
<pre>
 | 
						|
        a b c d e f g i j k r x y z</pre>
 | 
						|
<p>and you want to find the longest sequence of items that is present in
 | 
						|
both original sequences in the same order.  That is, you want to find
 | 
						|
a new sequence <em>S</em> which can be obtained from the first sequence by
 | 
						|
deleting some items, and from the secend sequence by deleting other
 | 
						|
items.  You also want <em>S</em> to be as long as possible.  In this case
 | 
						|
<em>S</em> is</p>
 | 
						|
<pre>
 | 
						|
        a b c d f g j z</pre>
 | 
						|
<p>From there it's only a small step to get diff-like output:</p>
 | 
						|
<pre>
 | 
						|
        e   h i   k   q r x y 
 | 
						|
        +   - +   +   - + + +</pre>
 | 
						|
<p>This module solves the LCS problem.  It also includes a canned
 | 
						|
function to generate <code>diff</code>-like output.</p>
 | 
						|
<p>It might seem from the example above that the LCS of two sequences is
 | 
						|
always pretty obvious, but that's not always the case, especially when
 | 
						|
the two sequences have many repeated elements.  For example, consider</p>
 | 
						|
<pre>
 | 
						|
        a x b y c z p d q
 | 
						|
        a b c a x b y c z</pre>
 | 
						|
<p>A naive approach might start by matching up the <code>a</code> and <code>b</code> that
 | 
						|
appear at the beginning of each sequence, like this:</p>
 | 
						|
<pre>
 | 
						|
        a x b y c         z p d q
 | 
						|
        a   b   c a b y c z</pre>
 | 
						|
<p>This finds the common subsequence <code>a b c z</code>.  But actually, the LCS
 | 
						|
is <code>a x b y c z</code>:</p>
 | 
						|
<pre>
 | 
						|
              a x b y c z p d q
 | 
						|
        a b c a x b y c z</pre>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<hr />
 | 
						|
<h1><a name="usage">USAGE</a></h1>
 | 
						|
<p>This module provides three exportable functions, which we'll deal with in
 | 
						|
ascending order of difficulty: <code>LCS</code>, 
 | 
						|
<code>diff</code>, <code>sdiff</code>, <code>traverse_sequences</code>, and <code>traverse_balanced</code>.</p>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<h2><a name="lcs"><code>LCS</code></a></h2>
 | 
						|
<p>Given references to two lists of items, LCS returns an array containing their
 | 
						|
longest common subsequence.  In scalar context, it returns a reference to
 | 
						|
such a list.</p>
 | 
						|
<pre>
 | 
						|
  @lcs    = LCS( \@seq1, \@seq2 );
 | 
						|
  $lcsref = LCS( \@seq1, \@seq2 );</pre>
 | 
						|
<p><code>LCS</code> may be passed an optional third parameter; this is a CODE
 | 
						|
reference to a key generation function.  See <a href="#key_generation_functions">KEY GENERATION FUNCTIONS</a>.</p>
 | 
						|
<pre>
 | 
						|
  @lcs    = LCS( \@seq1, \@seq2, $keyGen );
 | 
						|
  $lcsref = LCS( \@seq1, \@seq2, $keyGen );</pre>
 | 
						|
<p>Additional parameters, if any, will be passed to the key generation
 | 
						|
routine.</p>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<h2><a name="diff"><code>diff</code></a></h2>
 | 
						|
<pre>
 | 
						|
  @diffs     = diff( \@seq1, \@seq2 );
 | 
						|
  $diffs_ref = diff( \@seq1, \@seq2 );</pre>
 | 
						|
<p><code>diff</code> computes the smallest set of additions and deletions necessary
 | 
						|
to turn the first sequence into the second, and returns a description
 | 
						|
of these changes.  The description is a list of <em>hunks</em>; each hunk
 | 
						|
represents a contiguous section of items which should be added,
 | 
						|
deleted, or replaced.  The return value of <code>diff</code> is a list of
 | 
						|
hunks, or, in scalar context, a reference to such a list.</p>
 | 
						|
<p>Here is an example:  The diff of the following two sequences:</p>
 | 
						|
<pre>
 | 
						|
  a b c e h j l m n p
 | 
						|
  b c d e f j k l m r s t</pre>
 | 
						|
<p>Result:</p>
 | 
						|
<pre>
 | 
						|
 [ 
 | 
						|
   [ [ '-', 0, 'a' ] ],</pre>
 | 
						|
<pre>
 | 
						|
   [ [ '+', 2, 'd' ] ],</pre>
 | 
						|
<pre>
 | 
						|
   [ [ '-', 4, 'h' ] , 
 | 
						|
     [ '+', 4, 'f' ] ],</pre>
 | 
						|
<pre>
 | 
						|
   [ [ '+', 6, 'k' ] ],</pre>
 | 
						|
<pre>
 | 
						|
   [ [ '-', 8, 'n' ], 
 | 
						|
     [ '-', 9, 'p' ], 
 | 
						|
     [ '+', 9, 'r' ], 
 | 
						|
     [ '+', 10, 's' ], 
 | 
						|
     [ '+', 11, 't' ],
 | 
						|
   ]
 | 
						|
 ]</pre>
 | 
						|
<p>There are five hunks here.  The first hunk says that the <code>a</code> at
 | 
						|
position 0 of the first sequence should be deleted (<code>-</code>).  The second
 | 
						|
hunk says that the <code>d</code> at position 2 of the second sequence should
 | 
						|
be inserted (<code>+</code>).  The third hunk says that the <code>h</code> at position 4
 | 
						|
of the first sequence should be removed and replaced with the <code>f</code>
 | 
						|
from position 4 of the second sequence.  The other two hunks similarly.</p>
 | 
						|
<p><code>diff</code> may be passed an optional third parameter; this is a CODE
 | 
						|
reference to a key generation function.  See <a href="#key_generation_functions">KEY GENERATION FUNCTIONS</a>.</p>
 | 
						|
<p>Additional parameters, if any, will be passed to the key generation
 | 
						|
routine.</p>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<h2><a name="sdiff"><code>sdiff</code></a></h2>
 | 
						|
<pre>
 | 
						|
  @sdiffs     = sdiff( \@seq1, \@seq2 );
 | 
						|
  $sdiffs_ref = sdiff( \@seq1, \@seq2 );</pre>
 | 
						|
<p><code>sdiff</code> computes all necessary components to show two sequences
 | 
						|
and their minimized differences side by side, just like the 
 | 
						|
Unix-utility <em>sdiff</em> does:</p>
 | 
						|
<pre>
 | 
						|
    same             same
 | 
						|
    before     |     after
 | 
						|
    old        <     -
 | 
						|
    -          >     new</pre>
 | 
						|
<p>It returns a list of array refs, each pointing to an array of 
 | 
						|
display instructions. In scalar context it returns a reference
 | 
						|
to such a list.</p>
 | 
						|
<p>Display instructions consist of three elements: A modifier indicator
 | 
						|
(<code>+</code>: Element added, <code>-</code>: Element removed, <code>u</code>: Element unmodified, 
 | 
						|
<code>c</code>: Element changed) and the value of the old and new elements, to
 | 
						|
be displayed side by side.</p>
 | 
						|
<p>An <code>sdiff</code> of the following two sequences:</p>
 | 
						|
<pre>
 | 
						|
  a b c e h j l m n p
 | 
						|
  b c d e f j k l m r s t</pre>
 | 
						|
<p>results in</p>
 | 
						|
<p>[ [ '-', 'a', ''  ],
 | 
						|
  [ 'u', 'b', 'b' ],
 | 
						|
  [ 'u', 'c', 'c' ],
 | 
						|
  [ '+', '',  'd' ],
 | 
						|
  [ 'u', 'e', 'e' ],
 | 
						|
  [ 'c', 'h', 'f' ],
 | 
						|
  [ 'u', 'j', 'j' ],
 | 
						|
  [ '+', '',  'k' ],
 | 
						|
  [ 'u', 'l', 'l' ],
 | 
						|
  [ 'u', 'm', 'm' ],
 | 
						|
  [ 'c', 'n', 'r' ],
 | 
						|
  [ 'c', 'p', 's' ],
 | 
						|
  [ '+', '', 't' ] ]</p>
 | 
						|
<p><code>sdiff</code> may be passed an optional third parameter; this is a CODE
 | 
						|
reference to a key generation function.  See <a href="#key_generation_functions">KEY GENERATION FUNCTIONS</a>.</p>
 | 
						|
<p>Additional parameters, if any, will be passed to the key generation
 | 
						|
routine.</p>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<h2><a name="traverse_sequences"><code>traverse_sequences</code></a></h2>
 | 
						|
<p><code>traverse_sequences</code> is the most general facility provided by this
 | 
						|
module; <code>diff</code> and <code>LCS</code> are implemented as calls to it.</p>
 | 
						|
<p>Imagine that there are two arrows.  Arrow A points to an element of sequence A,
 | 
						|
and arrow B points to an element of the sequence B.  Initially, the arrows
 | 
						|
point to the first elements of the respective sequences.  <code>traverse_sequences</code>
 | 
						|
will advance the arrows through the sequences one element at a time, calling an
 | 
						|
appropriate user-specified callback function before each advance.  It
 | 
						|
willadvance the arrows in such a way that if there are equal elements <code>$A[$i]</code>
 | 
						|
and <code>$B[$j]</code> which are equal and which are part of the LCS, there will be
 | 
						|
some moment during the execution of <code>traverse_sequences</code> when arrow A is
 | 
						|
pointing to <code>$A[$i]</code> and arrow B is pointing to <code>$B[$j]</code>.  When this happens,
 | 
						|
<code>traverse_sequences</code> will call the <code>MATCH</code> callback function and then it will
 | 
						|
advance both arrows.</p>
 | 
						|
<p>Otherwise, one of the arrows is pointing to an element of its sequence that is
 | 
						|
not part of the LCS.  <code>traverse_sequences</code> will advance that arrow and will
 | 
						|
call the <code>DISCARD_A</code> or the <code>DISCARD_B</code> callback, depending on which arrow it
 | 
						|
advanced.  If both arrows point to elements that are not part of the LCS, then
 | 
						|
<code>traverse_sequences</code> will advance one of them and call the appropriate
 | 
						|
callback, but it is not specified which it will call.</p>
 | 
						|
<p>The arguments to <code>traverse_sequences</code> are the two sequences to traverse, and a
 | 
						|
hash which specifies the callback functions, like this:</p>
 | 
						|
<pre>
 | 
						|
  traverse_sequences( \@seq1, \@seq2,
 | 
						|
                     { MATCH => $callback_1,
 | 
						|
                       DISCARD_A => $callback_2,
 | 
						|
                       DISCARD_B => $callback_3,
 | 
						|
                     } );</pre>
 | 
						|
<p>Callbacks for MATCH, DISCARD_A, and DISCARD_B are invoked with at least the
 | 
						|
indices of the two arrows as their arguments.  They are not expected to return
 | 
						|
any values.  If a callback is omitted from the table, it is not called.</p>
 | 
						|
<p>Callbacks for A_FINISHED and B_FINISHED are invoked with at least the
 | 
						|
corresponding index in A or B.</p>
 | 
						|
<p>If arrow A reaches the end of its sequence, before arrow B does,
 | 
						|
<code>traverse_sequences</code> will call the <code>A_FINISHED</code> callback when it advances
 | 
						|
arrow B, if there is such a function; if not it will call <code>DISCARD_B</code> instead.
 | 
						|
Similarly if arrow B finishes first.  <code>traverse_sequences</code> returns when both
 | 
						|
arrows are at the ends of their respective sequences.  It returns true on
 | 
						|
success and false on failure.  At present there is no way to fail.</p>
 | 
						|
<p><code>traverse_sequences</code> may be passed an optional fourth parameter; this is a
 | 
						|
CODE reference to a key generation function.  See <a href="#key_generation_functions">KEY GENERATION FUNCTIONS</a>.</p>
 | 
						|
<p>Additional parameters, if any, will be passed to the key generation function.</p>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<h2><a name="traverse_balanced"><code>traverse_balanced</code></a></h2>
 | 
						|
<p><code>traverse_balanced</code> is an alternative to <code>traverse_sequences</code>. It
 | 
						|
uses a different algorithm to iterate through the entries in the
 | 
						|
computed LCS. Instead of sticking to one side and showing element changes
 | 
						|
as insertions and deletions only, it will jump back and forth between
 | 
						|
the two sequences and report <em>changes</em> occurring as deletions on one
 | 
						|
side followed immediatly by an insertion on the other side.</p>
 | 
						|
<p>In addition to the 
 | 
						|
<code>DISCARD_A</code>,
 | 
						|
<code>DISCARD_B</code>, and
 | 
						|
<code>MATCH</code>
 | 
						|
callbacks supported by <code>traverse_sequences</code>, <code>traverse_balanced</code> supports
 | 
						|
a <code>CHANGE</code> callback indicating that one element got <code>replaced</code> by another:</p>
 | 
						|
<pre>
 | 
						|
  traverse_sequences( \@seq1, \@seq2,
 | 
						|
                     { MATCH => $callback_1,
 | 
						|
                       DISCARD_A => $callback_2,
 | 
						|
                       DISCARD_B => $callback_3,
 | 
						|
                       CHANGE    => $callback_4,
 | 
						|
                     } );</pre>
 | 
						|
<p>If no <code>CHANGE</code> callback is specified, <code>traverse_balanced</code>
 | 
						|
will map <code>CHANGE</code> events to <code>DISCARD_A</code> and <code>DISCARD_B</code> actions,
 | 
						|
therefore resulting in a similar behaviour as <code>traverse_sequences</code>
 | 
						|
with different order of events.</p>
 | 
						|
<p><code>traverse_balanced</code> might be a bit slower than <code>traverse_sequences</code>,
 | 
						|
noticable only while processing huge amounts of data.</p>
 | 
						|
<p>The <code>sdiff</code> function of this module 
 | 
						|
is implemented as call to <code>traverse_balanced</code>.</p>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<hr />
 | 
						|
<h1><a name="key_generation_functions">KEY GENERATION FUNCTIONS</a></h1>
 | 
						|
<p><code>diff</code>, <code>LCS</code>, and <code>traverse_sequences</code> accept an optional last parameter.
 | 
						|
This is a CODE reference to a key generating (hashing) function that should
 | 
						|
return a string that uniquely identifies a given element.  It should be the
 | 
						|
case that if two elements are to be considered equal, their keys should be the
 | 
						|
same (and the other way around).  If no key generation function is provided,
 | 
						|
the key will be the element as a string.</p>
 | 
						|
<p>By default, comparisons will use ``eq'' and elements will be turned into keys
 | 
						|
using the default stringizing operator '``'''.</p>
 | 
						|
<p>Where this is important is when you're comparing something other than strings.
 | 
						|
If it is the case that you have multiple different objects that should be
 | 
						|
considered to be equal, you should supply a key generation function. Otherwise,
 | 
						|
you have to make sure that your arrays contain unique references.</p>
 | 
						|
<p>For instance, consider this example:</p>
 | 
						|
<pre>
 | 
						|
  package Person;</pre>
 | 
						|
<pre>
 | 
						|
  sub new
 | 
						|
  {
 | 
						|
    my $package = shift;
 | 
						|
    return bless { name => '', ssn => '', @_ }, $package;
 | 
						|
  }</pre>
 | 
						|
<pre>
 | 
						|
  sub clone
 | 
						|
  {
 | 
						|
    my $old = shift;
 | 
						|
    my $new = bless { %$old }, ref($old);
 | 
						|
  }</pre>
 | 
						|
<pre>
 | 
						|
  sub hash
 | 
						|
  {
 | 
						|
    return shift()->{'ssn'};
 | 
						|
  }</pre>
 | 
						|
<pre>
 | 
						|
  my $person1 = Person->new( name => 'Joe', ssn => '123-45-6789' );
 | 
						|
  my $person2 = Person->new( name => 'Mary', ssn => '123-47-0000' );
 | 
						|
  my $person3 = Person->new( name => 'Pete', ssn => '999-45-2222' );
 | 
						|
  my $person4 = Person->new( name => 'Peggy', ssn => '123-45-9999' );
 | 
						|
  my $person5 = Person->new( name => 'Frank', ssn => '000-45-9999' );</pre>
 | 
						|
<p>If you did this:</p>
 | 
						|
<pre>
 | 
						|
  my $array1 = [ $person1, $person2, $person4 ];
 | 
						|
  my $array2 = [ $person1, $person3, $person4, $person5 ];
 | 
						|
  GT::File::Diff::diff( $array1, $array2 );</pre>
 | 
						|
<p>everything would work out OK (each of the objects would be converted
 | 
						|
into a string like ``Person=HASH(0x82425b0)'' for comparison).</p>
 | 
						|
<p>But if you did this:</p>
 | 
						|
<pre>
 | 
						|
  my $array1 = [ $person1, $person2, $person4 ];
 | 
						|
  my $array2 = [ $person1, $person3, $person4->clone(), $person5 ];
 | 
						|
  GT::File::Diff::diff( $array1, $array2 );</pre>
 | 
						|
<p>$person4 and $person4-><code>clone()</code> (which have the same name and SSN)
 | 
						|
would be seen as different objects. If you wanted them to be considered
 | 
						|
equivalent, you would have to pass in a key generation function:</p>
 | 
						|
<pre>
 | 
						|
  my $array1 = [ $person1, $person2, $person4 ];
 | 
						|
  my $array2 = [ $person1, $person3, $person4->clone(), $person5 ];
 | 
						|
  GT::File::Diff::diff( $array1, $array2, \&Person::hash );</pre>
 | 
						|
<p>This would use the 'ssn' field in each Person as a comparison key, and
 | 
						|
so would consider $person4 and $person4-><code>clone()</code> as equal.</p>
 | 
						|
<p>You may also pass additional parameters to the key generation function
 | 
						|
if you wish.</p>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<hr />
 | 
						|
<h1><a name="author">AUTHOR</a></h1>
 | 
						|
<p>This version by Ned Konz, <a href="mailto:perl@bike-nomad.com">perl@bike-nomad.com</a></p>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<hr />
 | 
						|
<h1><a name="license">LICENSE</a></h1>
 | 
						|
<p>Copyright (c) 2000-2002 Ned Konz.  All rights reserved.
 | 
						|
This program is free software;
 | 
						|
you can redistribute it and/or modify it under the same terms
 | 
						|
as Perl itself.</p>
 | 
						|
<p>
 | 
						|
</p>
 | 
						|
<hr />
 | 
						|
<h1><a name="credits">CREDITS</a></h1>
 | 
						|
<p>Versions through 0.59 (and much of this documentation) were written by:</p>
 | 
						|
<p>Mark-Jason Dominus, <a href="mailto:mjd-perl-diff@plover.com">mjd-perl-diff@plover.com</a></p>
 | 
						|
<p>This version borrows the documentation and names of the routines
 | 
						|
from Mark-Jason's, but has all new code in Diff.pm.</p>
 | 
						|
<p>This code was adapted from the Smalltalk code of
 | 
						|
Mario Wolczko <<a href="mailto:mario@wolczko.com">mario@wolczko.com</a>>, which is available at
 | 
						|
<a href="ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st">ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st</a></p>
 | 
						|
<p><code>sdiff</code> and <code>traverse_balanced</code> were written by Mike Schilli
 | 
						|
<<a href="mailto:m@perlmeister.com">m@perlmeister.com</a>>.</p>
 | 
						|
<p>The algorithm is that described in 
 | 
						|
<em>A Fast Algorithm for Computing Longest Common Subsequences</em>,
 | 
						|
CACM, vol.20, no.5, pp.350-353, May 1977, with a few
 | 
						|
minor improvements to improve the speed.</p>
 | 
						|
 | 
						|
</body>
 | 
						|
 | 
						|
</html>
 |