File: | local/lib/perl5/Algorithm/Diff.pm |
Coverage: | 4.3% |
line | stmt | bran | cond | sub | time | code |
---|---|---|---|---|---|---|
1 | package Algorithm::Diff; | |||||
2 | # Skip to first "=head" line for documentation. | |||||
3 | 6 6 6 | 12 4 80 | use strict; | |||
4 | ||||||
5 | 6 6 6 | 850 30 12 | use integer; # see below in _replaceNextLargerWith() for mod to make | |||
6 | # if you don't use this | |||||
7 | 6 6 6 | 82 4 5791 | use vars qw( $VERSION @EXPORT_OK ); | |||
8 | $VERSION = 1.19_02; | |||||
9 | # ^ ^^ ^^-- Incremented at will | |||||
10 | # | \+----- Incremented for non-trivial changes to features | |||||
11 | # \-------- Incremented for fundamental changes | |||||
12 | require Exporter; | |||||
13 | *import = \&Exporter::import; | |||||
14 | @EXPORT_OK = qw( | |||||
15 | prepare LCS LCSidx LCS_length | |||||
16 | diff sdiff compact_diff | |||||
17 | traverse_sequences traverse_balanced | |||||
18 | ); | |||||
19 | ||||||
20 | # McIlroy-Hunt diff algorithm | |||||
21 | # Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com> | |||||
22 | # by Ned Konz, perl@bike-nomad.com | |||||
23 | # Updates by Tye McQueen, http://perlmonks.org/?node=tye | |||||
24 | ||||||
25 | # Create a hash that maps each element of $aCollection to the set of | |||||
26 | # positions it occupies in $aCollection, restricted to the elements | |||||
27 | # within the range of indexes specified by $start and $end. | |||||
28 | # The fourth parameter is a subroutine reference that will be called to | |||||
29 | # generate a string to use as a key. | |||||
30 | # Additional parameters, if any, will be passed to this subroutine. | |||||
31 | # | |||||
32 | # my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen ); | |||||
33 | ||||||
34 | sub _withPositionsOfInInterval | |||||
35 | { | |||||
36 | 0 | 0 | my $aCollection = shift; # array ref | |||
37 | 0 | 0 | my $start = shift; | |||
38 | 0 | 0 | my $end = shift; | |||
39 | 0 | 0 | my $keyGen = shift; | |||
40 | 0 | 0 | my %d; | |||
41 | my $index; | |||||
42 | 0 | 0 | for ( $index = $start ; $index <= $end ; $index++ ) | |||
43 | { | |||||
44 | 0 | 0 | my $element = $aCollection->[$index]; | |||
45 | 0 | 0 | my $key = &$keyGen( $element, @_ ); | |||
46 | 0 | 0 | if ( exists( $d{$key} ) ) | |||
47 | { | |||||
48 | 0 0 | 0 0 | unshift ( @{ $d{$key} }, $index ); | |||
49 | } | |||||
50 | else | |||||
51 | { | |||||
52 | 0 | 0 | $d{$key} = [$index]; | |||
53 | } | |||||
54 | } | |||||
55 | 0 | 0 | return wantarray ? %d : \%d; | |||
56 | } | |||||
57 | ||||||
58 | # Find the place at which aValue would normally be inserted into the | |||||
59 | # array. If that place is already occupied by aValue, do nothing, and | |||||
60 | # return undef. If the place does not exist (i.e., it is off the end of | |||||
61 | # the array), add it to the end, otherwise replace the element at that | |||||
62 | # point with aValue. It is assumed that the array's values are numeric. | |||||
63 | # This is where the bulk (75%) of the time is spent in this module, so | |||||
64 | # try to make it fast! | |||||
65 | ||||||
66 | sub _replaceNextLargerWith | |||||
67 | { | |||||
68 | 0 | 0 | my ( $array, $aValue, $high ) = @_; | |||
69 | 0 | 0 | $high ||= $#$array; | |||
70 | ||||||
71 | # off the end? | |||||
72 | 0 | 0 | if ( $high == -1 || $aValue > $array->[-1] ) | |||
73 | { | |||||
74 | 0 | 0 | push ( @$array, $aValue ); | |||
75 | 0 | 0 | return $high + 1; | |||
76 | } | |||||
77 | ||||||
78 | # binary search for insertion point... | |||||
79 | 0 | 0 | my $low = 0; | |||
80 | 0 | 0 | my $index; | |||
81 | my $found; | |||||
82 | 0 | 0 | while ( $low <= $high ) | |||
83 | { | |||||
84 | 0 | 0 | $index = ( $high + $low ) / 2; | |||
85 | ||||||
86 | # $index = int(( $high + $low ) / 2); # without 'use integer' | |||||
87 | 0 | 0 | $found = $array->[$index]; | |||
88 | ||||||
89 | 0 | 0 | if ( $aValue == $found ) | |||
90 | { | |||||
91 | 0 | 0 | return undef; | |||
92 | } | |||||
93 | elsif ( $aValue > $found ) | |||||
94 | { | |||||
95 | 0 | 0 | $low = $index + 1; | |||
96 | } | |||||
97 | else | |||||
98 | { | |||||
99 | 0 | 0 | $high = $index - 1; | |||
100 | } | |||||
101 | } | |||||
102 | ||||||
103 | # now insertion point is in $low. | |||||
104 | 0 | 0 | $array->[$low] = $aValue; # overwrite next larger | |||
105 | 0 | 0 | return $low; | |||
106 | } | |||||
107 | ||||||
108 | # This method computes the longest common subsequence in $a and $b. | |||||
109 | ||||||
110 | # Result is array or ref, whose contents is such that | |||||
111 | # $a->[ $i ] == $b->[ $result[ $i ] ] | |||||
112 | # foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined. | |||||
113 | ||||||
114 | # An additional argument may be passed; this is a hash or key generating | |||||
115 | # function that should return a string that uniquely identifies the given | |||||
116 | # element. It should be the case that if the key is the same, the elements | |||||
117 | # will compare the same. If this parameter is undef or missing, the key | |||||
118 | # will be the element as a string. | |||||
119 | ||||||
120 | # By default, comparisons will use "eq" and elements will be turned into keys | |||||
121 | # using the default stringizing operator '""'. | |||||
122 | ||||||
123 | # Additional parameters, if any, will be passed to the key generation | |||||
124 | # routine. | |||||
125 | ||||||
126 | sub _longestCommonSubsequence | |||||
127 | { | |||||
128 | 0 | 0 | my $a = shift; # array ref or hash ref | |||
129 | 0 | 0 | my $b = shift; # array ref or hash ref | |||
130 | 0 | 0 | my $counting = shift; # scalar | |||
131 | 0 | 0 | my $keyGen = shift; # code ref | |||
132 | 0 | 0 | my $compare; # code ref | |||
133 | ||||||
134 | 0 | 0 | if ( ref($a) eq 'HASH' ) | |||
135 | { # prepared hash must be in $b | |||||
136 | 0 | 0 | my $tmp = $b; | |||
137 | 0 | 0 | $b = $a; | |||
138 | 0 | 0 | $a = $tmp; | |||
139 | } | |||||
140 | ||||||
141 | # Check for bogus (non-ref) argument values | |||||
142 | 0 | 0 | if ( !ref($a) || !ref($b) ) | |||
143 | { | |||||
144 | 0 | 0 | my @callerInfo = caller(1); | |||
145 | 0 | 0 | die 'error: must pass array or hash references to ' . $callerInfo[3]; | |||
146 | } | |||||
147 | ||||||
148 | # set up code refs | |||||
149 | # Note that these are optimized. | |||||
150 | 0 | 0 | if ( !defined($keyGen) ) # optimize for strings | |||
151 | { | |||||
152 | 0 0 | 0 0 | $keyGen = sub { $_[0] }; | |||
153 | 0 0 0 | 0 0 0 | $compare = sub { my ( $a, $b ) = @_; $a eq $b }; | |||
154 | } | |||||
155 | else | |||||
156 | { | |||||
157 | $compare = sub { | |||||
158 | 0 | 0 | my $a = shift; | |||
159 | 0 | 0 | my $b = shift; | |||
160 | 0 | 0 | &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ ); | |||
161 | 0 | 0 | }; | |||
162 | } | |||||
163 | ||||||
164 | 0 | 0 | my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] ); | |||
165 | 0 | 0 | my ( $prunedCount, $bMatches ) = ( 0, {} ); | |||
166 | ||||||
167 | 0 | 0 | if ( ref($b) eq 'HASH' ) # was $bMatches prepared for us? | |||
168 | { | |||||
169 | 0 | 0 | $bMatches = $b; | |||
170 | } | |||||
171 | else | |||||
172 | { | |||||
173 | 0 | 0 | my ( $bStart, $bFinish ) = ( 0, $#$b ); | |||
174 | ||||||
175 | # First we prune off any common elements at the beginning | |||||
176 | 0 | 0 | while ( $aStart <= $aFinish | |||
177 | and $bStart <= $bFinish | |||||
178 | and &$compare( $a->[$aStart], $b->[$bStart], @_ ) ) | |||||
179 | { | |||||
180 | 0 | 0 | $matchVector->[ $aStart++ ] = $bStart++; | |||
181 | 0 | 0 | $prunedCount++; | |||
182 | } | |||||
183 | ||||||
184 | # now the end | |||||
185 | 0 | 0 | while ( $aStart <= $aFinish | |||
186 | and $bStart <= $bFinish | |||||
187 | and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) ) | |||||
188 | { | |||||
189 | 0 | 0 | $matchVector->[ $aFinish-- ] = $bFinish--; | |||
190 | 0 | 0 | $prunedCount++; | |||
191 | } | |||||
192 | ||||||
193 | # Now compute the equivalence classes of positions of elements | |||||
194 | $bMatches = | |||||
195 | 0 | 0 | _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ ); | |||
196 | } | |||||
197 | 0 | 0 | my $thresh = []; | |||
198 | 0 | 0 | my $links = []; | |||
199 | ||||||
200 | 0 | 0 | my ( $i, $ai, $j, $k ); | |||
201 | 0 | 0 | for ( $i = $aStart ; $i <= $aFinish ; $i++ ) | |||
202 | { | |||||
203 | 0 | 0 | $ai = &$keyGen( $a->[$i], @_ ); | |||
204 | 0 | 0 | if ( exists( $bMatches->{$ai} ) ) | |||
205 | { | |||||
206 | 0 | 0 | $k = 0; | |||
207 | 0 0 | 0 0 | for $j ( @{ $bMatches->{$ai} } ) | |||
208 | { | |||||
209 | ||||||
210 | # optimization: most of the time this will be true | |||||
211 | 0 | 0 | if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j ) | |||
212 | { | |||||
213 | 0 | 0 | $thresh->[$k] = $j; | |||
214 | } | |||||
215 | else | |||||
216 | { | |||||
217 | 0 | 0 | $k = _replaceNextLargerWith( $thresh, $j, $k ); | |||
218 | } | |||||
219 | ||||||
220 | # oddly, it's faster to always test this (CPU cache?). | |||||
221 | 0 | 0 | if ( defined($k) ) | |||
222 | { | |||||
223 | 0 | 0 | $links->[$k] = | |||
224 | [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ]; | |||||
225 | } | |||||
226 | } | |||||
227 | } | |||||
228 | } | |||||
229 | ||||||
230 | 0 | 0 | if (@$thresh) | |||
231 | { | |||||
232 | 0 | 0 | return $prunedCount + @$thresh if $counting; | |||
233 | 0 | 0 | for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] ) | |||
234 | { | |||||
235 | 0 | 0 | $matchVector->[ $link->[1] ] = $link->[2]; | |||
236 | } | |||||
237 | } | |||||
238 | elsif ($counting) | |||||
239 | { | |||||
240 | 0 | 0 | return $prunedCount; | |||
241 | } | |||||
242 | ||||||
243 | 0 | 0 | return wantarray ? @$matchVector : $matchVector; | |||
244 | } | |||||
245 | ||||||
246 | sub traverse_sequences | |||||
247 | { | |||||
248 | 0 | 0 | my $a = shift; # array ref | |||
249 | 0 | 0 | my $b = shift; # array ref | |||
250 | 0 | 0 | my $callbacks = shift || {}; | |||
251 | 0 | 0 | my $keyGen = shift; | |||
252 | 0 | 0 | my $matchCallback = $callbacks->{'MATCH'} || sub { }; | |||
253 | 0 | 0 | my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; | |||
254 | 0 | 0 | my $finishedACallback = $callbacks->{'A_FINISHED'}; | |||
255 | 0 | 0 | my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; | |||
256 | 0 | 0 | my $finishedBCallback = $callbacks->{'B_FINISHED'}; | |||
257 | 0 | 0 | my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ ); | |||
258 | ||||||
259 | # Process all the lines in @$matchVector | |||||
260 | 0 | 0 | my $lastA = $#$a; | |||
261 | 0 | 0 | my $lastB = $#$b; | |||
262 | 0 | 0 | my $bi = 0; | |||
263 | 0 | 0 | my $ai; | |||
264 | ||||||
265 | 0 | 0 | for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ ) | |||
266 | { | |||||
267 | 0 | 0 | my $bLine = $matchVector->[$ai]; | |||
268 | 0 | 0 | if ( defined($bLine) ) # matched | |||
269 | { | |||||
270 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine; | |||
271 | 0 | 0 | &$matchCallback( $ai, $bi++, @_ ); | |||
272 | } | |||||
273 | else | |||||
274 | { | |||||
275 | 0 | 0 | &$discardACallback( $ai, $bi, @_ ); | |||
276 | } | |||||
277 | } | |||||
278 | ||||||
279 | # The last entry (if any) processed was a match. | |||||
280 | # $ai and $bi point just past the last matching lines in their sequences. | |||||
281 | ||||||
282 | 0 | 0 | while ( $ai <= $lastA or $bi <= $lastB ) | |||
283 | { | |||||
284 | ||||||
285 | # last A? | |||||
286 | 0 | 0 | if ( $ai == $lastA + 1 and $bi <= $lastB ) | |||
287 | { | |||||
288 | 0 | 0 | if ( defined($finishedACallback) ) | |||
289 | { | |||||
290 | 0 | 0 | &$finishedACallback( $lastA, @_ ); | |||
291 | 0 | 0 | $finishedACallback = undef; | |||
292 | } | |||||
293 | else | |||||
294 | { | |||||
295 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB; | |||
296 | } | |||||
297 | } | |||||
298 | ||||||
299 | # last B? | |||||
300 | 0 | 0 | if ( $bi == $lastB + 1 and $ai <= $lastA ) | |||
301 | { | |||||
302 | 0 | 0 | if ( defined($finishedBCallback) ) | |||
303 | { | |||||
304 | 0 | 0 | &$finishedBCallback( $lastB, @_ ); | |||
305 | 0 | 0 | $finishedBCallback = undef; | |||
306 | } | |||||
307 | else | |||||
308 | { | |||||
309 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA; | |||
310 | } | |||||
311 | } | |||||
312 | ||||||
313 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA; | |||
314 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB; | |||
315 | } | |||||
316 | ||||||
317 | 0 | 0 | return 1; | |||
318 | } | |||||
319 | ||||||
320 | sub traverse_balanced | |||||
321 | { | |||||
322 | 0 | 0 | my $a = shift; # array ref | |||
323 | 0 | 0 | my $b = shift; # array ref | |||
324 | 0 | 0 | my $callbacks = shift || {}; | |||
325 | 0 | 0 | my $keyGen = shift; | |||
326 | 0 | 0 | my $matchCallback = $callbacks->{'MATCH'} || sub { }; | |||
327 | 0 | 0 | my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; | |||
328 | 0 | 0 | my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; | |||
329 | 0 | 0 | my $changeCallback = $callbacks->{'CHANGE'}; | |||
330 | 0 | 0 | my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ ); | |||
331 | ||||||
332 | # Process all the lines in match vector | |||||
333 | 0 | 0 | my $lastA = $#$a; | |||
334 | 0 | 0 | my $lastB = $#$b; | |||
335 | 0 | 0 | my $bi = 0; | |||
336 | 0 | 0 | my $ai = 0; | |||
337 | 0 | 0 | my $ma = -1; | |||
338 | 0 | 0 | my $mb; | |||
339 | ||||||
340 | 0 | 0 | while (1) | |||
341 | { | |||||
342 | ||||||
343 | # Find next match indices $ma and $mb | |||||
344 | 0 | 0 | do { | |||
345 | 0 | 0 | $ma++; | |||
346 | } while( | |||||
347 | $ma <= $#$matchVector | |||||
348 | && !defined $matchVector->[$ma] | |||||
349 | ); | |||||
350 | ||||||
351 | 0 | 0 | last if $ma > $#$matchVector; # end of matchVector? | |||
352 | 0 | 0 | $mb = $matchVector->[$ma]; | |||
353 | ||||||
354 | # Proceed with discard a/b or change events until | |||||
355 | # next match | |||||
356 | 0 | 0 | while ( $ai < $ma || $bi < $mb ) | |||
357 | { | |||||
358 | ||||||
359 | 0 | 0 | if ( $ai < $ma && $bi < $mb ) | |||
360 | { | |||||
361 | ||||||
362 | # Change | |||||
363 | 0 | 0 | if ( defined $changeCallback ) | |||
364 | { | |||||
365 | 0 | 0 | &$changeCallback( $ai++, $bi++, @_ ); | |||
366 | } | |||||
367 | else | |||||
368 | { | |||||
369 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ); | |||
370 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ); | |||
371 | } | |||||
372 | } | |||||
373 | elsif ( $ai < $ma ) | |||||
374 | { | |||||
375 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ); | |||
376 | } | |||||
377 | else | |||||
378 | { | |||||
379 | ||||||
380 | # $bi < $mb | |||||
381 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ); | |||
382 | } | |||||
383 | } | |||||
384 | ||||||
385 | # Match | |||||
386 | 0 | 0 | &$matchCallback( $ai++, $bi++, @_ ); | |||
387 | } | |||||
388 | ||||||
389 | 0 | 0 | while ( $ai <= $lastA || $bi <= $lastB ) | |||
390 | { | |||||
391 | 0 | 0 | if ( $ai <= $lastA && $bi <= $lastB ) | |||
392 | { | |||||
393 | ||||||
394 | # Change | |||||
395 | 0 | 0 | if ( defined $changeCallback ) | |||
396 | { | |||||
397 | 0 | 0 | &$changeCallback( $ai++, $bi++, @_ ); | |||
398 | } | |||||
399 | else | |||||
400 | { | |||||
401 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ); | |||
402 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ); | |||
403 | } | |||||
404 | } | |||||
405 | elsif ( $ai <= $lastA ) | |||||
406 | { | |||||
407 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ); | |||
408 | } | |||||
409 | else | |||||
410 | { | |||||
411 | ||||||
412 | # $bi <= $lastB | |||||
413 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ); | |||
414 | } | |||||
415 | } | |||||
416 | ||||||
417 | 0 | 0 | return 1; | |||
418 | } | |||||
419 | ||||||
420 | sub prepare | |||||
421 | { | |||||
422 | 0 | 0 | my $a = shift; # array ref | |||
423 | 0 | 0 | my $keyGen = shift; # code ref | |||
424 | ||||||
425 | # set up code ref | |||||
426 | 0 0 | 0 0 | $keyGen = sub { $_[0] } unless defined($keyGen); | |||
427 | ||||||
428 | 0 | 0 | return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ ); | |||
429 | } | |||||
430 | ||||||
431 | sub LCS | |||||
432 | { | |||||
433 | 0 | 0 | my $a = shift; # array ref | |||
434 | 0 | 0 | my $b = shift; # array ref or hash ref | |||
435 | 0 | 0 | my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ ); | |||
436 | 0 | 0 | my @retval; | |||
437 | my $i; | |||||
438 | 0 | 0 | for ( $i = 0 ; $i <= $#$matchVector ; $i++ ) | |||
439 | { | |||||
440 | 0 | 0 | if ( defined( $matchVector->[$i] ) ) | |||
441 | { | |||||
442 | 0 | 0 | push ( @retval, $a->[$i] ); | |||
443 | } | |||||
444 | } | |||||
445 | 0 | 0 | return wantarray ? @retval : \@retval; | |||
446 | } | |||||
447 | ||||||
448 | sub LCS_length | |||||
449 | { | |||||
450 | 0 | 0 | my $a = shift; # array ref | |||
451 | 0 | 0 | my $b = shift; # array ref or hash ref | |||
452 | 0 | 0 | return _longestCommonSubsequence( $a, $b, 1, @_ ); | |||
453 | } | |||||
454 | ||||||
455 | sub LCSidx | |||||
456 | { | |||||
457 | 0 | 0 | my $a= shift @_; | |||
458 | 0 | 0 | my $b= shift @_; | |||
459 | 0 | 0 | my $match= _longestCommonSubsequence( $a, $b, 0, @_ ); | |||
460 | 0 | 0 | my @am= grep defined $match->[$_], 0..$#$match; | |||
461 | 0 0 | 0 0 | my @bm= @{$match}[@am]; | |||
462 | 0 | 0 | return \@am, \@bm; | |||
463 | } | |||||
464 | ||||||
465 | sub compact_diff | |||||
466 | { | |||||
467 | 0 | 0 | my $a= shift @_; | |||
468 | 0 | 0 | my $b= shift @_; | |||
469 | 0 | 0 | my( $am, $bm )= LCSidx( $a, $b, @_ ); | |||
470 | 0 | 0 | my @cdiff; | |||
471 | 0 | 0 | my( $ai, $bi )= ( 0, 0 ); | |||
472 | 0 | 0 | push @cdiff, $ai, $bi; | |||
473 | 0 | 0 | while( 1 ) { | |||
474 | 0 | 0 | while( @$am && $ai == $am->[0] && $bi == $bm->[0] ) { | |||
475 | 0 | 0 | shift @$am; | |||
476 | 0 | 0 | shift @$bm; | |||
477 | 0 | 0 | ++$ai, ++$bi; | |||
478 | } | |||||
479 | 0 | 0 | push @cdiff, $ai, $bi; | |||
480 | 0 | 0 | last if ! @$am; | |||
481 | 0 | 0 | $ai = $am->[0]; | |||
482 | 0 | 0 | $bi = $bm->[0]; | |||
483 | 0 | 0 | push @cdiff, $ai, $bi; | |||
484 | } | |||||
485 | 0 | 0 | push @cdiff, 0+@$a, 0+@$b | |||
486 | if $ai < @$a || $bi < @$b; | |||||
487 | 0 | 0 | return wantarray ? @cdiff : \@cdiff; | |||
488 | } | |||||
489 | ||||||
490 | sub diff | |||||
491 | { | |||||
492 | 0 | 0 | my $a = shift; # array ref | |||
493 | 0 | 0 | my $b = shift; # array ref | |||
494 | 0 | 0 | my $retval = []; | |||
495 | 0 | 0 | my $hunk = []; | |||
496 | my $discard = sub { | |||||
497 | 0 | 0 | push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ]; | |||
498 | 0 | 0 | }; | |||
499 | my $add = sub { | |||||
500 | 0 | 0 | push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ]; | |||
501 | 0 | 0 | }; | |||
502 | my $match = sub { | |||||
503 | 0 | 0 | push @$retval, $hunk | |||
504 | if 0 < @$hunk; | |||||
505 | 0 | 0 | $hunk = [] | |||
506 | 0 | 0 | }; | |||
507 | 0 | 0 | traverse_sequences( $a, $b, | |||
508 | { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ ); | |||||
509 | 0 | 0 | &$match(); | |||
510 | 0 | 0 | return wantarray ? @$retval : $retval; | |||
511 | } | |||||
512 | ||||||
513 | sub sdiff | |||||
514 | { | |||||
515 | 0 | 0 | my $a = shift; # array ref | |||
516 | 0 | 0 | my $b = shift; # array ref | |||
517 | 0 | 0 | my $retval = []; | |||
518 | 0 0 | 0 0 | my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) }; | |||
519 | 0 0 | 0 0 | my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) }; | |||
520 | my $change = sub { | |||||
521 | 0 | 0 | push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] ); | |||
522 | 0 | 0 | }; | |||
523 | my $match = sub { | |||||
524 | 0 | 0 | push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] ); | |||
525 | 0 | 0 | }; | |||
526 | 0 | 0 | traverse_balanced( | |||
527 | $a, | |||||
528 | $b, | |||||
529 | { | |||||
530 | MATCH => $match, | |||||
531 | DISCARD_A => $discard, | |||||
532 | DISCARD_B => $add, | |||||
533 | CHANGE => $change, | |||||
534 | }, | |||||
535 | @_ | |||||
536 | ); | |||||
537 | 0 | 0 | return wantarray ? @$retval : $retval; | |||
538 | } | |||||
539 | ||||||
540 | ######################################## | |||||
541 | my $Root= __PACKAGE__; | |||||
542 | package Algorithm::Diff::_impl; | |||||
543 | 6 6 6 | 17 3 3181 | use strict; | |||
544 | ||||||
545 | sub _Idx() { 0 } # $me->[_Idx]: Ref to array of hunk indices | |||||
546 | # 1 # $me->[1]: Ref to first sequence | |||||
547 | # 2 # $me->[2]: Ref to second sequence | |||||
548 | sub _End() { 3 } # $me->[_End]: Diff between forward and reverse pos | |||||
549 | sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items | |||||
550 | sub _Base() { 5 } # $me->[_Base]: Added to range's min and max | |||||
551 | sub _Pos() { 6 } # $me->[_Pos]: Which hunk is currently selected | |||||
552 | sub _Off() { 7 } # $me->[_Off]: Offset into _Idx for current position | |||||
553 | sub _Min() { -2 } # Added to _Off to get min instead of max+1 | |||||
554 | ||||||
555 | sub Die | |||||
556 | { | |||||
557 | 0 | 0 | require Carp; | |||
558 | 0 | 0 | Carp::confess( @_ ); | |||
559 | } | |||||
560 | ||||||
561 | sub _ChkPos | |||||
562 | { | |||||
563 | 0 | 0 | my( $me )= @_; | |||
564 | 0 | 0 | return if $me->[_Pos]; | |||
565 | 0 | 0 | my $meth= ( caller(1) )[3]; | |||
566 | 0 | 0 | Die( "Called $meth on 'reset' object" ); | |||
567 | } | |||||
568 | ||||||
569 | sub _ChkSeq | |||||
570 | { | |||||
571 | 0 | 0 | my( $me, $seq )= @_; | |||
572 | 0 | 0 | return $seq + $me->[_Off] | |||
573 | if 1 == $seq || 2 == $seq; | |||||
574 | 0 | 0 | my $meth= ( caller(1) )[3]; | |||
575 | 0 | 0 | Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" ); | |||
576 | } | |||||
577 | ||||||
578 | sub getObjPkg | |||||
579 | { | |||||
580 | 6 | 4 | my( $us )= @_; | |||
581 | 6 | 13 | return ref $us if ref $us; | |||
582 | 6 | 8 | return $us . "::_obj"; | |||
583 | } | |||||
584 | ||||||
585 | sub new | |||||
586 | { | |||||
587 | 0 | my( $us, $seq1, $seq2, $opts ) = @_; | ||||
588 | 0 | my @args; | ||||
589 | 0 | for( $opts->{keyGen} ) { | ||||
590 | 0 | push @args, $_ if $_; | ||||
591 | } | |||||
592 | 0 | for( $opts->{keyGenArgs} ) { | ||||
593 | 0 | push @args, @$_ if $_; | ||||
594 | } | |||||
595 | 0 | my $cdif= Algorithm::Diff::compact_diff( $seq1, $seq2, @args ); | ||||
596 | 0 | my $same= 1; | ||||
597 | 0 | if( 0 == $cdif->[2] && 0 == $cdif->[3] ) { | ||||
598 | 0 | $same= 0; | ||||
599 | 0 | splice @$cdif, 0, 2; | ||||
600 | } | |||||
601 | 0 | my @obj= ( $cdif, $seq1, $seq2 ); | ||||
602 | 0 | $obj[_End] = (1+@$cdif)/2; | ||||
603 | 0 | $obj[_Same] = $same; | ||||
604 | 0 | $obj[_Base] = 0; | ||||
605 | 0 | my $me = bless \@obj, $us->getObjPkg(); | ||||
606 | 0 | $me->Reset( 0 ); | ||||
607 | 0 | return $me; | ||||
608 | } | |||||
609 | ||||||
610 | sub Reset | |||||
611 | { | |||||
612 | 0 | my( $me, $pos )= @_; | ||||
613 | 0 | $pos= int( $pos || 0 ); | ||||
614 | 0 | $pos += $me->[_End] | ||||
615 | if $pos < 0; | |||||
616 | 0 | $pos= 0 | ||||
617 | if $pos < 0 || $me->[_End] <= $pos; | |||||
618 | 0 | $me->[_Pos]= $pos || !1; | ||||
619 | 0 | $me->[_Off]= 2*$pos - 1; | ||||
620 | 0 | return $me; | ||||
621 | } | |||||
622 | ||||||
623 | sub Base | |||||
624 | { | |||||
625 | 0 | my( $me, $base )= @_; | ||||
626 | 0 | my $oldBase= $me->[_Base]; | ||||
627 | 0 | $me->[_Base]= 0+$base if defined $base; | ||||
628 | 0 | return $oldBase; | ||||
629 | } | |||||
630 | ||||||
631 | sub Copy | |||||
632 | { | |||||
633 | 0 | my( $me, $pos, $base )= @_; | ||||
634 | 0 | my @obj= @$me; | ||||
635 | 0 | my $you= bless \@obj, ref($me); | ||||
636 | 0 | $you->Reset( $pos ) if defined $pos; | ||||
637 | 0 | $you->Base( $base ); | ||||
638 | 0 | return $you; | ||||
639 | } | |||||
640 | ||||||
641 | sub Next { | |||||
642 | 0 | my( $me, $steps )= @_; | ||||
643 | 0 | $steps= 1 if ! defined $steps; | ||||
644 | 0 | if( $steps ) { | ||||
645 | 0 | my $pos= $me->[_Pos]; | ||||
646 | 0 | my $new= $pos + $steps; | ||||
647 | 0 | $new= 0 if $pos && $new < 0; | ||||
648 | 0 | $me->Reset( $new ) | ||||
649 | } | |||||
650 | 0 | return $me->[_Pos]; | ||||
651 | } | |||||
652 | ||||||
653 | sub Prev { | |||||
654 | 0 | my( $me, $steps )= @_; | ||||
655 | 0 | $steps= 1 if ! defined $steps; | ||||
656 | 0 | my $pos= $me->Next(-$steps); | ||||
657 | 0 | $pos -= $me->[_End] if $pos; | ||||
658 | 0 | return $pos; | ||||
659 | } | |||||
660 | ||||||
661 | sub Diff { | |||||
662 | 0 | my( $me )= @_; | ||||
663 | 0 | $me->_ChkPos(); | ||||
664 | 0 | return 0 if $me->[_Same] == ( 1 & $me->[_Pos] ); | ||||
665 | 0 | my $ret= 0; | ||||
666 | 0 | my $off= $me->[_Off]; | ||||
667 | 0 | for my $seq ( 1, 2 ) { | ||||
668 | 0 | $ret |= $seq | ||||
669 | if $me->[_Idx][ $off + $seq + _Min ] | |||||
670 | < $me->[_Idx][ $off + $seq ]; | |||||
671 | } | |||||
672 | 0 | return $ret; | ||||
673 | } | |||||
674 | ||||||
675 | sub Min { | |||||
676 | 0 | my( $me, $seq, $base )= @_; | ||||
677 | 0 | $me->_ChkPos(); | ||||
678 | 0 | my $off= $me->_ChkSeq($seq); | ||||
679 | 0 | $base= $me->[_Base] if !defined $base; | ||||
680 | 0 | return $base + $me->[_Idx][ $off + _Min ]; | ||||
681 | } | |||||
682 | ||||||
683 | sub Max { | |||||
684 | 0 | my( $me, $seq, $base )= @_; | ||||
685 | 0 | $me->_ChkPos(); | ||||
686 | 0 | my $off= $me->_ChkSeq($seq); | ||||
687 | 0 | $base= $me->[_Base] if !defined $base; | ||||
688 | 0 | return $base + $me->[_Idx][ $off ] -1; | ||||
689 | } | |||||
690 | ||||||
691 | sub Range { | |||||
692 | 0 | my( $me, $seq, $base )= @_; | ||||
693 | 0 | $me->_ChkPos(); | ||||
694 | 0 | my $off = $me->_ChkSeq($seq); | ||||
695 | 0 | if( !wantarray ) { | ||||
696 | 0 | return $me->[_Idx][ $off ] | ||||
697 | - $me->[_Idx][ $off + _Min ]; | |||||
698 | } | |||||
699 | 0 | $base= $me->[_Base] if !defined $base; | ||||
700 | 0 | return ( $base + $me->[_Idx][ $off + _Min ] ) | ||||
701 | .. ( $base + $me->[_Idx][ $off ] - 1 ); | |||||
702 | } | |||||
703 | ||||||
704 | sub Items { | |||||
705 | 0 | my( $me, $seq )= @_; | ||||
706 | 0 | $me->_ChkPos(); | ||||
707 | 0 | my $off = $me->_ChkSeq($seq); | ||||
708 | 0 | if( !wantarray ) { | ||||
709 | 0 | return $me->[_Idx][ $off ] | ||||
710 | - $me->[_Idx][ $off + _Min ]; | |||||
711 | } | |||||
712 | return | |||||
713 | 0 0 | @{$me->[$seq]}[ | ||||
714 | $me->[_Idx][ $off + _Min ] | |||||
715 | .. ( $me->[_Idx][ $off ] - 1 ) | |||||
716 | ]; | |||||
717 | } | |||||
718 | ||||||
719 | sub Same { | |||||
720 | 0 | my( $me )= @_; | ||||
721 | 0 | $me->_ChkPos(); | ||||
722 | 0 | return wantarray ? () : 0 | ||||
723 | if $me->[_Same] != ( 1 & $me->[_Pos] ); | |||||
724 | 0 | return $me->Items(1); | ||||
725 | } | |||||
726 | ||||||
727 | my %getName; | |||||
728 | BEGIN { | |||||
729 | 6 | 759 | %getName= ( | |||
730 | same => \&Same, | |||||
731 | diff => \&Diff, | |||||
732 | base => \&Base, | |||||
733 | min => \&Min, | |||||
734 | max => \&Max, | |||||
735 | range=> \&Range, | |||||
736 | items=> \&Items, # same thing | |||||
737 | ); | |||||
738 | } | |||||
739 | ||||||
740 | sub Get | |||||
741 | { | |||||
742 | 0 | my $me= shift @_; | ||||
743 | 0 | $me->_ChkPos(); | ||||
744 | 0 | my @value; | ||||
745 | 0 | for my $arg ( @_ ) { | ||||
746 | 0 | for my $word ( split ' ', $arg ) { | ||||
747 | 0 | my $meth; | ||||
748 | 0 | if( $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/ | ||||
749 | || not $meth= $getName{ lc $2 } | |||||
750 | ) { | |||||
751 | 0 | Die( $Root, ", Get: Invalid request ($word)" ); | ||||
752 | } | |||||
753 | 0 | my( $base, $name, $seq )= ( $1, $2, $3 ); | ||||
754 | 0 | push @value, scalar( | ||||
755 | 4 == length($name) | |||||
756 | ? $meth->( $me ) | |||||
757 | : $meth->( $me, $seq, $base ) | |||||
758 | ); | |||||
759 | } | |||||
760 | } | |||||
761 | 0 | if( wantarray ) { | ||||
762 | 0 | return @value; | ||||
763 | } elsif( 1 == @value ) { | |||||
764 | 0 | return $value[0]; | ||||
765 | } | |||||
766 | 0 | Die( 0+@value, " values requested from ", | ||||
767 | $Root, "'s Get in scalar context" ); | |||||
768 | } | |||||
769 | ||||||
770 | ||||||
771 | my $Obj= getObjPkg($Root); | |||||
772 | 6 6 6 | 21 6 300 | no strict 'refs'; | |||
773 | ||||||
774 | for my $meth ( qw( new getObjPkg ) ) { | |||||
775 | *{$Root."::".$meth} = \&{$meth}; | |||||
776 | *{$Obj ."::".$meth} = \&{$meth}; | |||||
777 | } | |||||
778 | for my $meth ( qw( | |||||
779 | Next Prev Reset Copy Base Diff | |||||
780 | Same Items Range Min Max Get | |||||
781 | _ChkPos _ChkSeq | |||||
782 | ) ) { | |||||
783 | *{$Obj."::".$meth} = \&{$meth}; | |||||
784 | } | |||||
785 | ||||||
786 | 1; |