| File: | local/lib/perl5/Algorithm/Diff.pm |
| Coverage: | 4.3% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package Algorithm::Diff; | |||||
| 2 | # Skip to first "=head" line for documentation. | |||||
| 3 | 6 6 6 | 12 4 80 | use strict; | |||
| 4 | ||||||
| 5 | 6 6 6 | 850 30 12 | use integer; # see below in _replaceNextLargerWith() for mod to make | |||
| 6 | # if you don't use this | |||||
| 7 | 6 6 6 | 82 4 5791 | use vars qw( $VERSION @EXPORT_OK ); | |||
| 8 | $VERSION = 1.19_02; | |||||
| 9 | # ^ ^^ ^^-- Incremented at will | |||||
| 10 | # | \+----- Incremented for non-trivial changes to features | |||||
| 11 | # \-------- Incremented for fundamental changes | |||||
| 12 | require Exporter; | |||||
| 13 | *import = \&Exporter::import; | |||||
| 14 | @EXPORT_OK = qw( | |||||
| 15 | prepare LCS LCSidx LCS_length | |||||
| 16 | diff sdiff compact_diff | |||||
| 17 | traverse_sequences traverse_balanced | |||||
| 18 | ); | |||||
| 19 | ||||||
| 20 | # McIlroy-Hunt diff algorithm | |||||
| 21 | # Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com> | |||||
| 22 | # by Ned Konz, perl@bike-nomad.com | |||||
| 23 | # Updates by Tye McQueen, http://perlmonks.org/?node=tye | |||||
| 24 | ||||||
| 25 | # Create a hash that maps each element of $aCollection to the set of | |||||
| 26 | # positions it occupies in $aCollection, restricted to the elements | |||||
| 27 | # within the range of indexes specified by $start and $end. | |||||
| 28 | # The fourth parameter is a subroutine reference that will be called to | |||||
| 29 | # generate a string to use as a key. | |||||
| 30 | # Additional parameters, if any, will be passed to this subroutine. | |||||
| 31 | # | |||||
| 32 | # my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen ); | |||||
| 33 | ||||||
| 34 | sub _withPositionsOfInInterval | |||||
| 35 | { | |||||
| 36 | 0 | 0 | my $aCollection = shift; # array ref | |||
| 37 | 0 | 0 | my $start = shift; | |||
| 38 | 0 | 0 | my $end = shift; | |||
| 39 | 0 | 0 | my $keyGen = shift; | |||
| 40 | 0 | 0 | my %d; | |||
| 41 | my $index; | |||||
| 42 | 0 | 0 | for ( $index = $start ; $index <= $end ; $index++ ) | |||
| 43 | { | |||||
| 44 | 0 | 0 | my $element = $aCollection->[$index]; | |||
| 45 | 0 | 0 | my $key = &$keyGen( $element, @_ ); | |||
| 46 | 0 | 0 | if ( exists( $d{$key} ) ) | |||
| 47 | { | |||||
| 48 | 0 0 | 0 0 | unshift ( @{ $d{$key} }, $index ); | |||
| 49 | } | |||||
| 50 | else | |||||
| 51 | { | |||||
| 52 | 0 | 0 | $d{$key} = [$index]; | |||
| 53 | } | |||||
| 54 | } | |||||
| 55 | 0 | 0 | return wantarray ? %d : \%d; | |||
| 56 | } | |||||
| 57 | ||||||
| 58 | # Find the place at which aValue would normally be inserted into the | |||||
| 59 | # array. If that place is already occupied by aValue, do nothing, and | |||||
| 60 | # return undef. If the place does not exist (i.e., it is off the end of | |||||
| 61 | # the array), add it to the end, otherwise replace the element at that | |||||
| 62 | # point with aValue. It is assumed that the array's values are numeric. | |||||
| 63 | # This is where the bulk (75%) of the time is spent in this module, so | |||||
| 64 | # try to make it fast! | |||||
| 65 | ||||||
| 66 | sub _replaceNextLargerWith | |||||
| 67 | { | |||||
| 68 | 0 | 0 | my ( $array, $aValue, $high ) = @_; | |||
| 69 | 0 | 0 | $high ||= $#$array; | |||
| 70 | ||||||
| 71 | # off the end? | |||||
| 72 | 0 | 0 | if ( $high == -1 || $aValue > $array->[-1] ) | |||
| 73 | { | |||||
| 74 | 0 | 0 | push ( @$array, $aValue ); | |||
| 75 | 0 | 0 | return $high + 1; | |||
| 76 | } | |||||
| 77 | ||||||
| 78 | # binary search for insertion point... | |||||
| 79 | 0 | 0 | my $low = 0; | |||
| 80 | 0 | 0 | my $index; | |||
| 81 | my $found; | |||||
| 82 | 0 | 0 | while ( $low <= $high ) | |||
| 83 | { | |||||
| 84 | 0 | 0 | $index = ( $high + $low ) / 2; | |||
| 85 | ||||||
| 86 | # $index = int(( $high + $low ) / 2); # without 'use integer' | |||||
| 87 | 0 | 0 | $found = $array->[$index]; | |||
| 88 | ||||||
| 89 | 0 | 0 | if ( $aValue == $found ) | |||
| 90 | { | |||||
| 91 | 0 | 0 | return undef; | |||
| 92 | } | |||||
| 93 | elsif ( $aValue > $found ) | |||||
| 94 | { | |||||
| 95 | 0 | 0 | $low = $index + 1; | |||
| 96 | } | |||||
| 97 | else | |||||
| 98 | { | |||||
| 99 | 0 | 0 | $high = $index - 1; | |||
| 100 | } | |||||
| 101 | } | |||||
| 102 | ||||||
| 103 | # now insertion point is in $low. | |||||
| 104 | 0 | 0 | $array->[$low] = $aValue; # overwrite next larger | |||
| 105 | 0 | 0 | return $low; | |||
| 106 | } | |||||
| 107 | ||||||
| 108 | # This method computes the longest common subsequence in $a and $b. | |||||
| 109 | ||||||
| 110 | # Result is array or ref, whose contents is such that | |||||
| 111 | # $a->[ $i ] == $b->[ $result[ $i ] ] | |||||
| 112 | # foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined. | |||||
| 113 | ||||||
| 114 | # An additional argument may be passed; this is a hash or key generating | |||||
| 115 | # function that should return a string that uniquely identifies the given | |||||
| 116 | # element. It should be the case that if the key is the same, the elements | |||||
| 117 | # will compare the same. If this parameter is undef or missing, the key | |||||
| 118 | # will be the element as a string. | |||||
| 119 | ||||||
| 120 | # By default, comparisons will use "eq" and elements will be turned into keys | |||||
| 121 | # using the default stringizing operator '""'. | |||||
| 122 | ||||||
| 123 | # Additional parameters, if any, will be passed to the key generation | |||||
| 124 | # routine. | |||||
| 125 | ||||||
| 126 | sub _longestCommonSubsequence | |||||
| 127 | { | |||||
| 128 | 0 | 0 | my $a = shift; # array ref or hash ref | |||
| 129 | 0 | 0 | my $b = shift; # array ref or hash ref | |||
| 130 | 0 | 0 | my $counting = shift; # scalar | |||
| 131 | 0 | 0 | my $keyGen = shift; # code ref | |||
| 132 | 0 | 0 | my $compare; # code ref | |||
| 133 | ||||||
| 134 | 0 | 0 | if ( ref($a) eq 'HASH' ) | |||
| 135 | { # prepared hash must be in $b | |||||
| 136 | 0 | 0 | my $tmp = $b; | |||
| 137 | 0 | 0 | $b = $a; | |||
| 138 | 0 | 0 | $a = $tmp; | |||
| 139 | } | |||||
| 140 | ||||||
| 141 | # Check for bogus (non-ref) argument values | |||||
| 142 | 0 | 0 | if ( !ref($a) || !ref($b) ) | |||
| 143 | { | |||||
| 144 | 0 | 0 | my @callerInfo = caller(1); | |||
| 145 | 0 | 0 | die 'error: must pass array or hash references to ' . $callerInfo[3]; | |||
| 146 | } | |||||
| 147 | ||||||
| 148 | # set up code refs | |||||
| 149 | # Note that these are optimized. | |||||
| 150 | 0 | 0 | if ( !defined($keyGen) ) # optimize for strings | |||
| 151 | { | |||||
| 152 | 0 0 | 0 0 | $keyGen = sub { $_[0] }; | |||
| 153 | 0 0 0 | 0 0 0 | $compare = sub { my ( $a, $b ) = @_; $a eq $b }; | |||
| 154 | } | |||||
| 155 | else | |||||
| 156 | { | |||||
| 157 | $compare = sub { | |||||
| 158 | 0 | 0 | my $a = shift; | |||
| 159 | 0 | 0 | my $b = shift; | |||
| 160 | 0 | 0 | &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ ); | |||
| 161 | 0 | 0 | }; | |||
| 162 | } | |||||
| 163 | ||||||
| 164 | 0 | 0 | my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] ); | |||
| 165 | 0 | 0 | my ( $prunedCount, $bMatches ) = ( 0, {} ); | |||
| 166 | ||||||
| 167 | 0 | 0 | if ( ref($b) eq 'HASH' ) # was $bMatches prepared for us? | |||
| 168 | { | |||||
| 169 | 0 | 0 | $bMatches = $b; | |||
| 170 | } | |||||
| 171 | else | |||||
| 172 | { | |||||
| 173 | 0 | 0 | my ( $bStart, $bFinish ) = ( 0, $#$b ); | |||
| 174 | ||||||
| 175 | # First we prune off any common elements at the beginning | |||||
| 176 | 0 | 0 | while ( $aStart <= $aFinish | |||
| 177 | and $bStart <= $bFinish | |||||
| 178 | and &$compare( $a->[$aStart], $b->[$bStart], @_ ) ) | |||||
| 179 | { | |||||
| 180 | 0 | 0 | $matchVector->[ $aStart++ ] = $bStart++; | |||
| 181 | 0 | 0 | $prunedCount++; | |||
| 182 | } | |||||
| 183 | ||||||
| 184 | # now the end | |||||
| 185 | 0 | 0 | while ( $aStart <= $aFinish | |||
| 186 | and $bStart <= $bFinish | |||||
| 187 | and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) ) | |||||
| 188 | { | |||||
| 189 | 0 | 0 | $matchVector->[ $aFinish-- ] = $bFinish--; | |||
| 190 | 0 | 0 | $prunedCount++; | |||
| 191 | } | |||||
| 192 | ||||||
| 193 | # Now compute the equivalence classes of positions of elements | |||||
| 194 | $bMatches = | |||||
| 195 | 0 | 0 | _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ ); | |||
| 196 | } | |||||
| 197 | 0 | 0 | my $thresh = []; | |||
| 198 | 0 | 0 | my $links = []; | |||
| 199 | ||||||
| 200 | 0 | 0 | my ( $i, $ai, $j, $k ); | |||
| 201 | 0 | 0 | for ( $i = $aStart ; $i <= $aFinish ; $i++ ) | |||
| 202 | { | |||||
| 203 | 0 | 0 | $ai = &$keyGen( $a->[$i], @_ ); | |||
| 204 | 0 | 0 | if ( exists( $bMatches->{$ai} ) ) | |||
| 205 | { | |||||
| 206 | 0 | 0 | $k = 0; | |||
| 207 | 0 0 | 0 0 | for $j ( @{ $bMatches->{$ai} } ) | |||
| 208 | { | |||||
| 209 | ||||||
| 210 | # optimization: most of the time this will be true | |||||
| 211 | 0 | 0 | if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j ) | |||
| 212 | { | |||||
| 213 | 0 | 0 | $thresh->[$k] = $j; | |||
| 214 | } | |||||
| 215 | else | |||||
| 216 | { | |||||
| 217 | 0 | 0 | $k = _replaceNextLargerWith( $thresh, $j, $k ); | |||
| 218 | } | |||||
| 219 | ||||||
| 220 | # oddly, it's faster to always test this (CPU cache?). | |||||
| 221 | 0 | 0 | if ( defined($k) ) | |||
| 222 | { | |||||
| 223 | 0 | 0 | $links->[$k] = | |||
| 224 | [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ]; | |||||
| 225 | } | |||||
| 226 | } | |||||
| 227 | } | |||||
| 228 | } | |||||
| 229 | ||||||
| 230 | 0 | 0 | if (@$thresh) | |||
| 231 | { | |||||
| 232 | 0 | 0 | return $prunedCount + @$thresh if $counting; | |||
| 233 | 0 | 0 | for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] ) | |||
| 234 | { | |||||
| 235 | 0 | 0 | $matchVector->[ $link->[1] ] = $link->[2]; | |||
| 236 | } | |||||
| 237 | } | |||||
| 238 | elsif ($counting) | |||||
| 239 | { | |||||
| 240 | 0 | 0 | return $prunedCount; | |||
| 241 | } | |||||
| 242 | ||||||
| 243 | 0 | 0 | return wantarray ? @$matchVector : $matchVector; | |||
| 244 | } | |||||
| 245 | ||||||
| 246 | sub traverse_sequences | |||||
| 247 | { | |||||
| 248 | 0 | 0 | my $a = shift; # array ref | |||
| 249 | 0 | 0 | my $b = shift; # array ref | |||
| 250 | 0 | 0 | my $callbacks = shift || {}; | |||
| 251 | 0 | 0 | my $keyGen = shift; | |||
| 252 | 0 | 0 | my $matchCallback = $callbacks->{'MATCH'} || sub { }; | |||
| 253 | 0 | 0 | my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; | |||
| 254 | 0 | 0 | my $finishedACallback = $callbacks->{'A_FINISHED'}; | |||
| 255 | 0 | 0 | my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; | |||
| 256 | 0 | 0 | my $finishedBCallback = $callbacks->{'B_FINISHED'}; | |||
| 257 | 0 | 0 | my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ ); | |||
| 258 | ||||||
| 259 | # Process all the lines in @$matchVector | |||||
| 260 | 0 | 0 | my $lastA = $#$a; | |||
| 261 | 0 | 0 | my $lastB = $#$b; | |||
| 262 | 0 | 0 | my $bi = 0; | |||
| 263 | 0 | 0 | my $ai; | |||
| 264 | ||||||
| 265 | 0 | 0 | for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ ) | |||
| 266 | { | |||||
| 267 | 0 | 0 | my $bLine = $matchVector->[$ai]; | |||
| 268 | 0 | 0 | if ( defined($bLine) ) # matched | |||
| 269 | { | |||||
| 270 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine; | |||
| 271 | 0 | 0 | &$matchCallback( $ai, $bi++, @_ ); | |||
| 272 | } | |||||
| 273 | else | |||||
| 274 | { | |||||
| 275 | 0 | 0 | &$discardACallback( $ai, $bi, @_ ); | |||
| 276 | } | |||||
| 277 | } | |||||
| 278 | ||||||
| 279 | # The last entry (if any) processed was a match. | |||||
| 280 | # $ai and $bi point just past the last matching lines in their sequences. | |||||
| 281 | ||||||
| 282 | 0 | 0 | while ( $ai <= $lastA or $bi <= $lastB ) | |||
| 283 | { | |||||
| 284 | ||||||
| 285 | # last A? | |||||
| 286 | 0 | 0 | if ( $ai == $lastA + 1 and $bi <= $lastB ) | |||
| 287 | { | |||||
| 288 | 0 | 0 | if ( defined($finishedACallback) ) | |||
| 289 | { | |||||
| 290 | 0 | 0 | &$finishedACallback( $lastA, @_ ); | |||
| 291 | 0 | 0 | $finishedACallback = undef; | |||
| 292 | } | |||||
| 293 | else | |||||
| 294 | { | |||||
| 295 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB; | |||
| 296 | } | |||||
| 297 | } | |||||
| 298 | ||||||
| 299 | # last B? | |||||
| 300 | 0 | 0 | if ( $bi == $lastB + 1 and $ai <= $lastA ) | |||
| 301 | { | |||||
| 302 | 0 | 0 | if ( defined($finishedBCallback) ) | |||
| 303 | { | |||||
| 304 | 0 | 0 | &$finishedBCallback( $lastB, @_ ); | |||
| 305 | 0 | 0 | $finishedBCallback = undef; | |||
| 306 | } | |||||
| 307 | else | |||||
| 308 | { | |||||
| 309 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA; | |||
| 310 | } | |||||
| 311 | } | |||||
| 312 | ||||||
| 313 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA; | |||
| 314 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB; | |||
| 315 | } | |||||
| 316 | ||||||
| 317 | 0 | 0 | return 1; | |||
| 318 | } | |||||
| 319 | ||||||
| 320 | sub traverse_balanced | |||||
| 321 | { | |||||
| 322 | 0 | 0 | my $a = shift; # array ref | |||
| 323 | 0 | 0 | my $b = shift; # array ref | |||
| 324 | 0 | 0 | my $callbacks = shift || {}; | |||
| 325 | 0 | 0 | my $keyGen = shift; | |||
| 326 | 0 | 0 | my $matchCallback = $callbacks->{'MATCH'} || sub { }; | |||
| 327 | 0 | 0 | my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; | |||
| 328 | 0 | 0 | my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; | |||
| 329 | 0 | 0 | my $changeCallback = $callbacks->{'CHANGE'}; | |||
| 330 | 0 | 0 | my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ ); | |||
| 331 | ||||||
| 332 | # Process all the lines in match vector | |||||
| 333 | 0 | 0 | my $lastA = $#$a; | |||
| 334 | 0 | 0 | my $lastB = $#$b; | |||
| 335 | 0 | 0 | my $bi = 0; | |||
| 336 | 0 | 0 | my $ai = 0; | |||
| 337 | 0 | 0 | my $ma = -1; | |||
| 338 | 0 | 0 | my $mb; | |||
| 339 | ||||||
| 340 | 0 | 0 | while (1) | |||
| 341 | { | |||||
| 342 | ||||||
| 343 | # Find next match indices $ma and $mb | |||||
| 344 | 0 | 0 | do { | |||
| 345 | 0 | 0 | $ma++; | |||
| 346 | } while( | |||||
| 347 | $ma <= $#$matchVector | |||||
| 348 | && !defined $matchVector->[$ma] | |||||
| 349 | ); | |||||
| 350 | ||||||
| 351 | 0 | 0 | last if $ma > $#$matchVector; # end of matchVector? | |||
| 352 | 0 | 0 | $mb = $matchVector->[$ma]; | |||
| 353 | ||||||
| 354 | # Proceed with discard a/b or change events until | |||||
| 355 | # next match | |||||
| 356 | 0 | 0 | while ( $ai < $ma || $bi < $mb ) | |||
| 357 | { | |||||
| 358 | ||||||
| 359 | 0 | 0 | if ( $ai < $ma && $bi < $mb ) | |||
| 360 | { | |||||
| 361 | ||||||
| 362 | # Change | |||||
| 363 | 0 | 0 | if ( defined $changeCallback ) | |||
| 364 | { | |||||
| 365 | 0 | 0 | &$changeCallback( $ai++, $bi++, @_ ); | |||
| 366 | } | |||||
| 367 | else | |||||
| 368 | { | |||||
| 369 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ); | |||
| 370 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ); | |||
| 371 | } | |||||
| 372 | } | |||||
| 373 | elsif ( $ai < $ma ) | |||||
| 374 | { | |||||
| 375 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ); | |||
| 376 | } | |||||
| 377 | else | |||||
| 378 | { | |||||
| 379 | ||||||
| 380 | # $bi < $mb | |||||
| 381 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ); | |||
| 382 | } | |||||
| 383 | } | |||||
| 384 | ||||||
| 385 | # Match | |||||
| 386 | 0 | 0 | &$matchCallback( $ai++, $bi++, @_ ); | |||
| 387 | } | |||||
| 388 | ||||||
| 389 | 0 | 0 | while ( $ai <= $lastA || $bi <= $lastB ) | |||
| 390 | { | |||||
| 391 | 0 | 0 | if ( $ai <= $lastA && $bi <= $lastB ) | |||
| 392 | { | |||||
| 393 | ||||||
| 394 | # Change | |||||
| 395 | 0 | 0 | if ( defined $changeCallback ) | |||
| 396 | { | |||||
| 397 | 0 | 0 | &$changeCallback( $ai++, $bi++, @_ ); | |||
| 398 | } | |||||
| 399 | else | |||||
| 400 | { | |||||
| 401 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ); | |||
| 402 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ); | |||
| 403 | } | |||||
| 404 | } | |||||
| 405 | elsif ( $ai <= $lastA ) | |||||
| 406 | { | |||||
| 407 | 0 | 0 | &$discardACallback( $ai++, $bi, @_ ); | |||
| 408 | } | |||||
| 409 | else | |||||
| 410 | { | |||||
| 411 | ||||||
| 412 | # $bi <= $lastB | |||||
| 413 | 0 | 0 | &$discardBCallback( $ai, $bi++, @_ ); | |||
| 414 | } | |||||
| 415 | } | |||||
| 416 | ||||||
| 417 | 0 | 0 | return 1; | |||
| 418 | } | |||||
| 419 | ||||||
| 420 | sub prepare | |||||
| 421 | { | |||||
| 422 | 0 | 0 | my $a = shift; # array ref | |||
| 423 | 0 | 0 | my $keyGen = shift; # code ref | |||
| 424 | ||||||
| 425 | # set up code ref | |||||
| 426 | 0 0 | 0 0 | $keyGen = sub { $_[0] } unless defined($keyGen); | |||
| 427 | ||||||
| 428 | 0 | 0 | return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ ); | |||
| 429 | } | |||||
| 430 | ||||||
| 431 | sub LCS | |||||
| 432 | { | |||||
| 433 | 0 | 0 | my $a = shift; # array ref | |||
| 434 | 0 | 0 | my $b = shift; # array ref or hash ref | |||
| 435 | 0 | 0 | my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ ); | |||
| 436 | 0 | 0 | my @retval; | |||
| 437 | my $i; | |||||
| 438 | 0 | 0 | for ( $i = 0 ; $i <= $#$matchVector ; $i++ ) | |||
| 439 | { | |||||
| 440 | 0 | 0 | if ( defined( $matchVector->[$i] ) ) | |||
| 441 | { | |||||
| 442 | 0 | 0 | push ( @retval, $a->[$i] ); | |||
| 443 | } | |||||
| 444 | } | |||||
| 445 | 0 | 0 | return wantarray ? @retval : \@retval; | |||
| 446 | } | |||||
| 447 | ||||||
| 448 | sub LCS_length | |||||
| 449 | { | |||||
| 450 | 0 | 0 | my $a = shift; # array ref | |||
| 451 | 0 | 0 | my $b = shift; # array ref or hash ref | |||
| 452 | 0 | 0 | return _longestCommonSubsequence( $a, $b, 1, @_ ); | |||
| 453 | } | |||||
| 454 | ||||||
| 455 | sub LCSidx | |||||
| 456 | { | |||||
| 457 | 0 | 0 | my $a= shift @_; | |||
| 458 | 0 | 0 | my $b= shift @_; | |||
| 459 | 0 | 0 | my $match= _longestCommonSubsequence( $a, $b, 0, @_ ); | |||
| 460 | 0 | 0 | my @am= grep defined $match->[$_], 0..$#$match; | |||
| 461 | 0 0 | 0 0 | my @bm= @{$match}[@am]; | |||
| 462 | 0 | 0 | return \@am, \@bm; | |||
| 463 | } | |||||
| 464 | ||||||
| 465 | sub compact_diff | |||||
| 466 | { | |||||
| 467 | 0 | 0 | my $a= shift @_; | |||
| 468 | 0 | 0 | my $b= shift @_; | |||
| 469 | 0 | 0 | my( $am, $bm )= LCSidx( $a, $b, @_ ); | |||
| 470 | 0 | 0 | my @cdiff; | |||
| 471 | 0 | 0 | my( $ai, $bi )= ( 0, 0 ); | |||
| 472 | 0 | 0 | push @cdiff, $ai, $bi; | |||
| 473 | 0 | 0 | while( 1 ) { | |||
| 474 | 0 | 0 | while( @$am && $ai == $am->[0] && $bi == $bm->[0] ) { | |||
| 475 | 0 | 0 | shift @$am; | |||
| 476 | 0 | 0 | shift @$bm; | |||
| 477 | 0 | 0 | ++$ai, ++$bi; | |||
| 478 | } | |||||
| 479 | 0 | 0 | push @cdiff, $ai, $bi; | |||
| 480 | 0 | 0 | last if ! @$am; | |||
| 481 | 0 | 0 | $ai = $am->[0]; | |||
| 482 | 0 | 0 | $bi = $bm->[0]; | |||
| 483 | 0 | 0 | push @cdiff, $ai, $bi; | |||
| 484 | } | |||||
| 485 | 0 | 0 | push @cdiff, 0+@$a, 0+@$b | |||
| 486 | if $ai < @$a || $bi < @$b; | |||||
| 487 | 0 | 0 | return wantarray ? @cdiff : \@cdiff; | |||
| 488 | } | |||||
| 489 | ||||||
| 490 | sub diff | |||||
| 491 | { | |||||
| 492 | 0 | 0 | my $a = shift; # array ref | |||
| 493 | 0 | 0 | my $b = shift; # array ref | |||
| 494 | 0 | 0 | my $retval = []; | |||
| 495 | 0 | 0 | my $hunk = []; | |||
| 496 | my $discard = sub { | |||||
| 497 | 0 | 0 | push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ]; | |||
| 498 | 0 | 0 | }; | |||
| 499 | my $add = sub { | |||||
| 500 | 0 | 0 | push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ]; | |||
| 501 | 0 | 0 | }; | |||
| 502 | my $match = sub { | |||||
| 503 | 0 | 0 | push @$retval, $hunk | |||
| 504 | if 0 < @$hunk; | |||||
| 505 | 0 | 0 | $hunk = [] | |||
| 506 | 0 | 0 | }; | |||
| 507 | 0 | 0 | traverse_sequences( $a, $b, | |||
| 508 | { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ ); | |||||
| 509 | 0 | 0 | &$match(); | |||
| 510 | 0 | 0 | return wantarray ? @$retval : $retval; | |||
| 511 | } | |||||
| 512 | ||||||
| 513 | sub sdiff | |||||
| 514 | { | |||||
| 515 | 0 | 0 | my $a = shift; # array ref | |||
| 516 | 0 | 0 | my $b = shift; # array ref | |||
| 517 | 0 | 0 | my $retval = []; | |||
| 518 | 0 0 | 0 0 | my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) }; | |||
| 519 | 0 0 | 0 0 | my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) }; | |||
| 520 | my $change = sub { | |||||
| 521 | 0 | 0 | push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] ); | |||
| 522 | 0 | 0 | }; | |||
| 523 | my $match = sub { | |||||
| 524 | 0 | 0 | push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] ); | |||
| 525 | 0 | 0 | }; | |||
| 526 | 0 | 0 | traverse_balanced( | |||
| 527 | $a, | |||||
| 528 | $b, | |||||
| 529 | { | |||||
| 530 | MATCH => $match, | |||||
| 531 | DISCARD_A => $discard, | |||||
| 532 | DISCARD_B => $add, | |||||
| 533 | CHANGE => $change, | |||||
| 534 | }, | |||||
| 535 | @_ | |||||
| 536 | ); | |||||
| 537 | 0 | 0 | return wantarray ? @$retval : $retval; | |||
| 538 | } | |||||
| 539 | ||||||
| 540 | ######################################## | |||||
| 541 | my $Root= __PACKAGE__; | |||||
| 542 | package Algorithm::Diff::_impl; | |||||
| 543 | 6 6 6 | 17 3 3181 | use strict; | |||
| 544 | ||||||
| 545 | sub _Idx() { 0 } # $me->[_Idx]: Ref to array of hunk indices | |||||
| 546 | # 1 # $me->[1]: Ref to first sequence | |||||
| 547 | # 2 # $me->[2]: Ref to second sequence | |||||
| 548 | sub _End() { 3 } # $me->[_End]: Diff between forward and reverse pos | |||||
| 549 | sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items | |||||
| 550 | sub _Base() { 5 } # $me->[_Base]: Added to range's min and max | |||||
| 551 | sub _Pos() { 6 } # $me->[_Pos]: Which hunk is currently selected | |||||
| 552 | sub _Off() { 7 } # $me->[_Off]: Offset into _Idx for current position | |||||
| 553 | sub _Min() { -2 } # Added to _Off to get min instead of max+1 | |||||
| 554 | ||||||
| 555 | sub Die | |||||
| 556 | { | |||||
| 557 | 0 | 0 | require Carp; | |||
| 558 | 0 | 0 | Carp::confess( @_ ); | |||
| 559 | } | |||||
| 560 | ||||||
| 561 | sub _ChkPos | |||||
| 562 | { | |||||
| 563 | 0 | 0 | my( $me )= @_; | |||
| 564 | 0 | 0 | return if $me->[_Pos]; | |||
| 565 | 0 | 0 | my $meth= ( caller(1) )[3]; | |||
| 566 | 0 | 0 | Die( "Called $meth on 'reset' object" ); | |||
| 567 | } | |||||
| 568 | ||||||
| 569 | sub _ChkSeq | |||||
| 570 | { | |||||
| 571 | 0 | 0 | my( $me, $seq )= @_; | |||
| 572 | 0 | 0 | return $seq + $me->[_Off] | |||
| 573 | if 1 == $seq || 2 == $seq; | |||||
| 574 | 0 | 0 | my $meth= ( caller(1) )[3]; | |||
| 575 | 0 | 0 | Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" ); | |||
| 576 | } | |||||
| 577 | ||||||
| 578 | sub getObjPkg | |||||
| 579 | { | |||||
| 580 | 6 | 4 | my( $us )= @_; | |||
| 581 | 6 | 13 | return ref $us if ref $us; | |||
| 582 | 6 | 8 | return $us . "::_obj"; | |||
| 583 | } | |||||
| 584 | ||||||
| 585 | sub new | |||||
| 586 | { | |||||
| 587 | 0 | my( $us, $seq1, $seq2, $opts ) = @_; | ||||
| 588 | 0 | my @args; | ||||
| 589 | 0 | for( $opts->{keyGen} ) { | ||||
| 590 | 0 | push @args, $_ if $_; | ||||
| 591 | } | |||||
| 592 | 0 | for( $opts->{keyGenArgs} ) { | ||||
| 593 | 0 | push @args, @$_ if $_; | ||||
| 594 | } | |||||
| 595 | 0 | my $cdif= Algorithm::Diff::compact_diff( $seq1, $seq2, @args ); | ||||
| 596 | 0 | my $same= 1; | ||||
| 597 | 0 | if( 0 == $cdif->[2] && 0 == $cdif->[3] ) { | ||||
| 598 | 0 | $same= 0; | ||||
| 599 | 0 | splice @$cdif, 0, 2; | ||||
| 600 | } | |||||
| 601 | 0 | my @obj= ( $cdif, $seq1, $seq2 ); | ||||
| 602 | 0 | $obj[_End] = (1+@$cdif)/2; | ||||
| 603 | 0 | $obj[_Same] = $same; | ||||
| 604 | 0 | $obj[_Base] = 0; | ||||
| 605 | 0 | my $me = bless \@obj, $us->getObjPkg(); | ||||
| 606 | 0 | $me->Reset( 0 ); | ||||
| 607 | 0 | return $me; | ||||
| 608 | } | |||||
| 609 | ||||||
| 610 | sub Reset | |||||
| 611 | { | |||||
| 612 | 0 | my( $me, $pos )= @_; | ||||
| 613 | 0 | $pos= int( $pos || 0 ); | ||||
| 614 | 0 | $pos += $me->[_End] | ||||
| 615 | if $pos < 0; | |||||
| 616 | 0 | $pos= 0 | ||||
| 617 | if $pos < 0 || $me->[_End] <= $pos; | |||||
| 618 | 0 | $me->[_Pos]= $pos || !1; | ||||
| 619 | 0 | $me->[_Off]= 2*$pos - 1; | ||||
| 620 | 0 | return $me; | ||||
| 621 | } | |||||
| 622 | ||||||
| 623 | sub Base | |||||
| 624 | { | |||||
| 625 | 0 | my( $me, $base )= @_; | ||||
| 626 | 0 | my $oldBase= $me->[_Base]; | ||||
| 627 | 0 | $me->[_Base]= 0+$base if defined $base; | ||||
| 628 | 0 | return $oldBase; | ||||
| 629 | } | |||||
| 630 | ||||||
| 631 | sub Copy | |||||
| 632 | { | |||||
| 633 | 0 | my( $me, $pos, $base )= @_; | ||||
| 634 | 0 | my @obj= @$me; | ||||
| 635 | 0 | my $you= bless \@obj, ref($me); | ||||
| 636 | 0 | $you->Reset( $pos ) if defined $pos; | ||||
| 637 | 0 | $you->Base( $base ); | ||||
| 638 | 0 | return $you; | ||||
| 639 | } | |||||
| 640 | ||||||
| 641 | sub Next { | |||||
| 642 | 0 | my( $me, $steps )= @_; | ||||
| 643 | 0 | $steps= 1 if ! defined $steps; | ||||
| 644 | 0 | if( $steps ) { | ||||
| 645 | 0 | my $pos= $me->[_Pos]; | ||||
| 646 | 0 | my $new= $pos + $steps; | ||||
| 647 | 0 | $new= 0 if $pos && $new < 0; | ||||
| 648 | 0 | $me->Reset( $new ) | ||||
| 649 | } | |||||
| 650 | 0 | return $me->[_Pos]; | ||||
| 651 | } | |||||
| 652 | ||||||
| 653 | sub Prev { | |||||
| 654 | 0 | my( $me, $steps )= @_; | ||||
| 655 | 0 | $steps= 1 if ! defined $steps; | ||||
| 656 | 0 | my $pos= $me->Next(-$steps); | ||||
| 657 | 0 | $pos -= $me->[_End] if $pos; | ||||
| 658 | 0 | return $pos; | ||||
| 659 | } | |||||
| 660 | ||||||
| 661 | sub Diff { | |||||
| 662 | 0 | my( $me )= @_; | ||||
| 663 | 0 | $me->_ChkPos(); | ||||
| 664 | 0 | return 0 if $me->[_Same] == ( 1 & $me->[_Pos] ); | ||||
| 665 | 0 | my $ret= 0; | ||||
| 666 | 0 | my $off= $me->[_Off]; | ||||
| 667 | 0 | for my $seq ( 1, 2 ) { | ||||
| 668 | 0 | $ret |= $seq | ||||
| 669 | if $me->[_Idx][ $off + $seq + _Min ] | |||||
| 670 | < $me->[_Idx][ $off + $seq ]; | |||||
| 671 | } | |||||
| 672 | 0 | return $ret; | ||||
| 673 | } | |||||
| 674 | ||||||
| 675 | sub Min { | |||||
| 676 | 0 | my( $me, $seq, $base )= @_; | ||||
| 677 | 0 | $me->_ChkPos(); | ||||
| 678 | 0 | my $off= $me->_ChkSeq($seq); | ||||
| 679 | 0 | $base= $me->[_Base] if !defined $base; | ||||
| 680 | 0 | return $base + $me->[_Idx][ $off + _Min ]; | ||||
| 681 | } | |||||
| 682 | ||||||
| 683 | sub Max { | |||||
| 684 | 0 | my( $me, $seq, $base )= @_; | ||||
| 685 | 0 | $me->_ChkPos(); | ||||
| 686 | 0 | my $off= $me->_ChkSeq($seq); | ||||
| 687 | 0 | $base= $me->[_Base] if !defined $base; | ||||
| 688 | 0 | return $base + $me->[_Idx][ $off ] -1; | ||||
| 689 | } | |||||
| 690 | ||||||
| 691 | sub Range { | |||||
| 692 | 0 | my( $me, $seq, $base )= @_; | ||||
| 693 | 0 | $me->_ChkPos(); | ||||
| 694 | 0 | my $off = $me->_ChkSeq($seq); | ||||
| 695 | 0 | if( !wantarray ) { | ||||
| 696 | 0 | return $me->[_Idx][ $off ] | ||||
| 697 | - $me->[_Idx][ $off + _Min ]; | |||||
| 698 | } | |||||
| 699 | 0 | $base= $me->[_Base] if !defined $base; | ||||
| 700 | 0 | return ( $base + $me->[_Idx][ $off + _Min ] ) | ||||
| 701 | .. ( $base + $me->[_Idx][ $off ] - 1 ); | |||||
| 702 | } | |||||
| 703 | ||||||
| 704 | sub Items { | |||||
| 705 | 0 | my( $me, $seq )= @_; | ||||
| 706 | 0 | $me->_ChkPos(); | ||||
| 707 | 0 | my $off = $me->_ChkSeq($seq); | ||||
| 708 | 0 | if( !wantarray ) { | ||||
| 709 | 0 | return $me->[_Idx][ $off ] | ||||
| 710 | - $me->[_Idx][ $off + _Min ]; | |||||
| 711 | } | |||||
| 712 | return | |||||
| 713 | 0 0 | @{$me->[$seq]}[ | ||||
| 714 | $me->[_Idx][ $off + _Min ] | |||||
| 715 | .. ( $me->[_Idx][ $off ] - 1 ) | |||||
| 716 | ]; | |||||
| 717 | } | |||||
| 718 | ||||||
| 719 | sub Same { | |||||
| 720 | 0 | my( $me )= @_; | ||||
| 721 | 0 | $me->_ChkPos(); | ||||
| 722 | 0 | return wantarray ? () : 0 | ||||
| 723 | if $me->[_Same] != ( 1 & $me->[_Pos] ); | |||||
| 724 | 0 | return $me->Items(1); | ||||
| 725 | } | |||||
| 726 | ||||||
| 727 | my %getName; | |||||
| 728 | BEGIN { | |||||
| 729 | 6 | 759 | %getName= ( | |||
| 730 | same => \&Same, | |||||
| 731 | diff => \&Diff, | |||||
| 732 | base => \&Base, | |||||
| 733 | min => \&Min, | |||||
| 734 | max => \&Max, | |||||
| 735 | range=> \&Range, | |||||
| 736 | items=> \&Items, # same thing | |||||
| 737 | ); | |||||
| 738 | } | |||||
| 739 | ||||||
| 740 | sub Get | |||||
| 741 | { | |||||
| 742 | 0 | my $me= shift @_; | ||||
| 743 | 0 | $me->_ChkPos(); | ||||
| 744 | 0 | my @value; | ||||
| 745 | 0 | for my $arg ( @_ ) { | ||||
| 746 | 0 | for my $word ( split ' ', $arg ) { | ||||
| 747 | 0 | my $meth; | ||||
| 748 | 0 | if( $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/ | ||||
| 749 | || not $meth= $getName{ lc $2 } | |||||
| 750 | ) { | |||||
| 751 | 0 | Die( $Root, ", Get: Invalid request ($word)" ); | ||||
| 752 | } | |||||
| 753 | 0 | my( $base, $name, $seq )= ( $1, $2, $3 ); | ||||
| 754 | 0 | push @value, scalar( | ||||
| 755 | 4 == length($name) | |||||
| 756 | ? $meth->( $me ) | |||||
| 757 | : $meth->( $me, $seq, $base ) | |||||
| 758 | ); | |||||
| 759 | } | |||||
| 760 | } | |||||
| 761 | 0 | if( wantarray ) { | ||||
| 762 | 0 | return @value; | ||||
| 763 | } elsif( 1 == @value ) { | |||||
| 764 | 0 | return $value[0]; | ||||
| 765 | } | |||||
| 766 | 0 | Die( 0+@value, " values requested from ", | ||||
| 767 | $Root, "'s Get in scalar context" ); | |||||
| 768 | } | |||||
| 769 | ||||||
| 770 | ||||||
| 771 | my $Obj= getObjPkg($Root); | |||||
| 772 | 6 6 6 | 21 6 300 | no strict 'refs'; | |||
| 773 | ||||||
| 774 | for my $meth ( qw( new getObjPkg ) ) { | |||||
| 775 | *{$Root."::".$meth} = \&{$meth}; | |||||
| 776 | *{$Obj ."::".$meth} = \&{$meth}; | |||||
| 777 | } | |||||
| 778 | for my $meth ( qw( | |||||
| 779 | Next Prev Reset Copy Base Diff | |||||
| 780 | Same Items Range Min Max Get | |||||
| 781 | _ChkPos _ChkSeq | |||||
| 782 | ) ) { | |||||
| 783 | *{$Obj."::".$meth} = \&{$meth}; | |||||
| 784 | } | |||||
| 785 | ||||||
| 786 | 1; | |||||