File: | local/lib/perl5/Text/Diff.pm |
Coverage: | 11.7% |
line | stmt | bran | cond | sub | time | code |
---|---|---|---|---|---|---|
1 | package Text::Diff; | |||||
2 | ||||||
3 | 6 6 | 48 10 | use 5.00503; | |||
4 | 6 6 6 | 8 4 58 | use strict; | |||
5 | 6 6 6 | 10 4 490 | use Carp; | |||
6 | 6 6 6 | 10 4 30 | use Exporter (); | |||
7 | 6 6 6 | 1259 7 63 | use Algorithm::Diff (); | |||
8 | 6 6 6 | 12 3 198 | use vars qw{$VERSION @ISA @EXPORT}; | |||
9 | BEGIN { | |||||
10 | 6 | 5 | $VERSION = '1.41'; | |||
11 | 6 | 26 | @ISA = 'Exporter'; | |||
12 | 6 | 49 | @EXPORT = 'diff'; | |||
13 | }; | |||||
14 | ||||||
15 | ## Hunks are made of ops. An op is the starting index for each | |||||
16 | ## sequence and the opcode: | |||||
17 | 6 6 6 | 12 4 191 | use constant A => 0; # Array index before match/discard | |||
18 | 6 6 6 | 11 4 88 | use constant B => 1; | |||
19 | 6 6 6 | 10 3 90 | use constant OPCODE => 2; # "-", " ", "+" | |||
20 | 6 6 6 | 11 2 6158 | use constant FLAG => 3; # What to display if not OPCODE "!" | |||
21 | ||||||
22 | my %internal_styles = ( | |||||
23 | Unified => undef, | |||||
24 | Context => undef, | |||||
25 | OldStyle => undef, | |||||
26 | Table => undef, ## "internal", but in another module | |||||
27 | ); | |||||
28 | ||||||
29 | sub diff { | |||||
30 | 0 | my @seqs = ( shift, shift ); | ||||
31 | 0 | my $options = shift || {}; | ||||
32 | ||||||
33 | 0 | for my $i ( 0 .. 1 ) { | ||||
34 | 0 | my $seq = $seqs[$i]; | ||||
35 | 0 | my $type = ref $seq; | ||||
36 | ||||||
37 | 0 | while ( $type eq "CODE" ) { | ||||
38 | 0 | $seqs[$i] = $seq = $seq->( $options ); | ||||
39 | 0 | $type = ref $seq; | ||||
40 | } | |||||
41 | ||||||
42 | 0 | my $AorB = !$i ? "A" : "B"; | ||||
43 | ||||||
44 | 0 | if ( $type eq "ARRAY" ) { | ||||
45 | ## This is most efficient :) | |||||
46 | 0 | $options->{"OFFSET_$AorB"} = 0 | ||||
47 | unless defined $options->{"OFFSET_$AorB"}; | |||||
48 | } | |||||
49 | elsif ( $type eq "SCALAR" ) { | |||||
50 | 0 | $seqs[$i] = [split( /^/m, $$seq )]; | ||||
51 | 0 | $options->{"OFFSET_$AorB"} = 1 | ||||
52 | unless defined $options->{"OFFSET_$AorB"}; | |||||
53 | } | |||||
54 | elsif ( ! $type ) { | |||||
55 | 0 | $options->{"OFFSET_$AorB"} = 1 | ||||
56 | unless defined $options->{"OFFSET_$AorB"}; | |||||
57 | 0 | $options->{"FILENAME_$AorB"} = $seq | ||||
58 | unless defined $options->{"FILENAME_$AorB"}; | |||||
59 | 0 | $options->{"MTIME_$AorB"} = (stat($seq))[9] | ||||
60 | unless defined $options->{"MTIME_$AorB"}; | |||||
61 | ||||||
62 | 0 | local $/ = "\n"; | ||||
63 | 0 | open F, "<$seq" or carp "$!: $seq"; | ||||
64 | 0 | $seqs[$i] = [<F>]; | ||||
65 | 0 | close F; | ||||
66 | ||||||
67 | } | |||||
68 | elsif ( $type eq "GLOB" || UNIVERSAL::isa( $seq, "IO::Handle" ) ) { | |||||
69 | 0 | $options->{"OFFSET_$AorB"} = 1 | ||||
70 | unless defined $options->{"OFFSET_$AorB"}; | |||||
71 | 0 | local $/ = "\n"; | ||||
72 | 0 | $seqs[$i] = [<$seq>]; | ||||
73 | } | |||||
74 | else { | |||||
75 | 0 | confess "Can't handle input of type ", ref; | ||||
76 | } | |||||
77 | } | |||||
78 | ||||||
79 | ## Config vars | |||||
80 | 0 | my $output; | ||||
81 | 0 | my $output_handler = $options->{OUTPUT}; | ||||
82 | 0 | my $type = ref $output_handler ; | ||||
83 | 0 | if ( ! defined $output_handler ) { | ||||
84 | 0 | $output = ""; | ||||
85 | 0 0 | $output_handler = sub { $output .= shift }; | ||||
86 | } | |||||
87 | elsif ( $type eq "CODE" ) { | |||||
88 | ## No problems, mate. | |||||
89 | } | |||||
90 | elsif ( $type eq "SCALAR" ) { | |||||
91 | 0 | my $out_ref = $output_handler; | ||||
92 | 0 0 | $output_handler = sub { $$out_ref .= shift }; | ||||
93 | } | |||||
94 | elsif ( $type eq "ARRAY" ) { | |||||
95 | 0 | my $out_ref = $output_handler; | ||||
96 | 0 0 | $output_handler = sub { push @$out_ref, shift }; | ||||
97 | } | |||||
98 | elsif ( $type eq "GLOB" || UNIVERSAL::isa $output_handler, "IO::Handle" ) { | |||||
99 | 0 | my $output_handle = $output_handler; | ||||
100 | 0 0 | $output_handler = sub { print $output_handle shift }; | ||||
101 | } | |||||
102 | else { | |||||
103 | 0 | croak "Unrecognized output type: $type"; | ||||
104 | } | |||||
105 | ||||||
106 | 0 | my $style = $options->{STYLE}; | ||||
107 | 0 | $style = "Unified" unless defined $options->{STYLE}; | ||||
108 | 0 | $style = "Text::Diff::$style" if exists $internal_styles{$style}; | ||||
109 | ||||||
110 | 0 | if ( ! $style->can( "hunk" ) ) { | ||||
111 | 0 | eval "require $style; 1" or die $@; | ||||
112 | } | |||||
113 | ||||||
114 | 0 | $style = $style->new | ||||
115 | if ! ref $style && $style->can( "new" ); | |||||
116 | ||||||
117 | 0 | my $ctx_lines = $options->{CONTEXT}; | ||||
118 | 0 | $ctx_lines = 3 unless defined $ctx_lines; | ||||
119 | 0 | $ctx_lines = 0 if $style->isa( "Text::Diff::OldStyle" ); | ||||
120 | ||||||
121 | 0 | my @keygen_args = $options->{KEYGEN_ARGS} | ||||
122 | 0 | ? @{$options->{KEYGEN_ARGS}} | ||||
123 | : (); | |||||
124 | ||||||
125 | ## State vars | |||||
126 | 0 | my $diffs = 0; ## Number of discards this hunk | ||||
127 | 0 | my $ctx = 0; ## Number of " " (ctx_lines) ops pushed after last diff. | ||||
128 | 0 | my @ops; ## ops (" ", +, -) in this hunk | ||||
129 | 0 | my $hunks = 0; ## Number of hunks | ||||
130 | ||||||
131 | my $emit_ops = sub { | |||||
132 | 0 | $output_handler->( $style->file_header( @seqs, $options ) ) | ||||
133 | unless $hunks++; | |||||
134 | 0 | $output_handler->( $style->hunk_header( @seqs, @_, $options ) ); | ||||
135 | 0 | $output_handler->( $style->hunk ( @seqs, @_, $options ) ); | ||||
136 | 0 | $output_handler->( $style->hunk_footer( @seqs, @_, $options ) ); | ||||
137 | 0 | }; | ||||
138 | ||||||
139 | ## We keep 2*ctx_lines so that if a diff occurs | |||||
140 | ## at 2*ctx_lines we continue to grow the hunk instead | |||||
141 | ## of emitting diffs and context as we go. We | |||||
142 | ## need to know the total length of both of the two | |||||
143 | ## subsequences so the line count can be printed in the | |||||
144 | ## header. | |||||
145 | 0 0 0 0 | my $dis_a = sub {push @ops, [@_[0,1],"-"]; ++$diffs ; $ctx = 0 }; | ||||
146 | 0 0 0 0 | my $dis_b = sub {push @ops, [@_[0,1],"+"]; ++$diffs ; $ctx = 0 }; | ||||
147 | ||||||
148 | Algorithm::Diff::traverse_sequences( | |||||
149 | @seqs, | |||||
150 | { | |||||
151 | MATCH => sub { | |||||
152 | 0 | push @ops, [@_[0,1]," "]; | ||||
153 | ||||||
154 | 0 | if ( $diffs && ++$ctx > $ctx_lines * 2 ) { | ||||
155 | 0 | $emit_ops->( [ splice @ops, 0, $#ops - $ctx_lines ] ); | ||||
156 | 0 | $ctx = $diffs = 0; | ||||
157 | } | |||||
158 | ||||||
159 | ## throw away context lines that aren't needed any more | |||||
160 | 0 | shift @ops if ! $diffs && @ops > $ctx_lines; | ||||
161 | }, | |||||
162 | 0 | DISCARD_A => $dis_a, | ||||
163 | DISCARD_B => $dis_b, | |||||
164 | }, | |||||
165 | $options->{KEYGEN}, # pass in user arguments for key gen function | |||||
166 | @keygen_args, | |||||
167 | ); | |||||
168 | ||||||
169 | 0 | if ( $diffs ) { | ||||
170 | 0 | $#ops -= $ctx - $ctx_lines if $ctx > $ctx_lines; | ||||
171 | 0 | $emit_ops->( \@ops ); | ||||
172 | } | |||||
173 | ||||||
174 | 0 | $output_handler->( $style->file_footer( @seqs, $options ) ) if $hunks; | ||||
175 | ||||||
176 | 0 | return defined $output ? $output : $hunks; | ||||
177 | } | |||||
178 | ||||||
179 | sub _header { | |||||
180 | 0 | my ( $h ) = @_; | ||||
181 | 0 | my ( $p1, $fn1, $t1, $p2, $fn2, $t2 ) = @{$h}{ | ||||
182 | 0 | "FILENAME_PREFIX_A", | ||||
183 | "FILENAME_A", | |||||
184 | "MTIME_A", | |||||
185 | "FILENAME_PREFIX_B", | |||||
186 | "FILENAME_B", | |||||
187 | "MTIME_B" | |||||
188 | }; | |||||
189 | ||||||
190 | ## remember to change Text::Diff::Table if this logic is tweaked. | |||||
191 | 0 | return "" unless defined $fn1 && defined $fn2; | ||||
192 | ||||||
193 | 0 | return join( "", | ||||
194 | $p1, " ", $fn1, defined $t1 ? "\t" . localtime $t1 : (), "\n", | |||||
195 | $p2, " ", $fn2, defined $t2 ? "\t" . localtime $t2 : (), "\n", | |||||
196 | ); | |||||
197 | } | |||||
198 | ||||||
199 | ## _range encapsulates the building of, well, ranges. Turns out there are | |||||
200 | ## a few nuances. | |||||
201 | sub _range { | |||||
202 | 0 | my ( $ops, $a_or_b, $format ) = @_; | ||||
203 | ||||||
204 | 0 | my $start = $ops->[ 0]->[$a_or_b]; | ||||
205 | 0 | my $after = $ops->[-1]->[$a_or_b]; | ||||
206 | ||||||
207 | ## The sequence indexes in the lines are from *before* the OPCODE is | |||||
208 | ## executed, so we bump the last index up unless the OP indicates | |||||
209 | ## it didn't change. | |||||
210 | 0 | ++$after | ||||
211 | unless $ops->[-1]->[OPCODE] eq ( $a_or_b == A ? "+" : "-" ); | |||||
212 | ||||||
213 | ## convert from 0..n index to 1..(n+1) line number. The unless modifier | |||||
214 | ## handles diffs with no context, where only one file is affected. In this | |||||
215 | ## case $start == $after indicates an empty range, and the $start must | |||||
216 | ## not be incremented. | |||||
217 | 0 | my $empty_range = $start == $after; | ||||
218 | 0 | ++$start unless $empty_range; | ||||
219 | ||||||
220 | return | |||||
221 | 0 | $start == $after | ||||
222 | ? $format eq "unified" && $empty_range | |||||
223 | ? "$start,0" | |||||
224 | : $start | |||||
225 | : $format eq "unified" | |||||
226 | ? "$start,".($after-$start+1) | |||||
227 | : "$start,$after"; | |||||
228 | } | |||||
229 | ||||||
230 | sub _op_to_line { | |||||
231 | 0 | my ( $seqs, $op, $a_or_b, $op_prefixes ) = @_; | ||||
232 | ||||||
233 | 0 | my $opcode = $op->[OPCODE]; | ||||
234 | 0 | return () unless defined $op_prefixes->{$opcode}; | ||||
235 | ||||||
236 | 0 | my $op_sym = defined $op->[FLAG] ? $op->[FLAG] : $opcode; | ||||
237 | 0 | $op_sym = $op_prefixes->{$op_sym}; | ||||
238 | 0 | return () unless defined $op_sym; | ||||
239 | ||||||
240 | 0 | $a_or_b = $op->[OPCODE] ne "+" ? 0 : 1 unless defined $a_or_b; | ||||
241 | 0 | return ( $op_sym, $seqs->[$a_or_b][$op->[$a_or_b]] ); | ||||
242 | } | |||||
243 | ||||||
244 | SCOPE: { | |||||
245 | package Text::Diff::Base; | |||||
246 | ||||||
247 | sub new { | |||||
248 | 0 | my $proto = shift; | ||||
249 | 0 | return bless { @_ }, ref $proto || $proto; | ||||
250 | } | |||||
251 | ||||||
252 | 0 | sub file_header { return "" } | ||||
253 | ||||||
254 | 0 | sub hunk_header { return "" } | ||||
255 | ||||||
256 | 0 | sub hunk { return "" } | ||||
257 | ||||||
258 | 0 | sub hunk_footer { return "" } | ||||
259 | ||||||
260 | 0 | sub file_footer { return "" } | ||||
261 | } | |||||
262 | ||||||
263 | @Text::Diff::Unified::ISA = qw( Text::Diff::Base ); | |||||
264 | ||||||
265 | sub Text::Diff::Unified::file_header { | |||||
266 | 0 | shift; ## No instance data | ||||
267 | 0 | my $options = pop ; | ||||
268 | ||||||
269 | 0 | _header( | ||||
270 | { FILENAME_PREFIX_A => "---", FILENAME_PREFIX_B => "+++", %$options } | |||||
271 | ); | |||||
272 | } | |||||
273 | ||||||
274 | sub Text::Diff::Unified::hunk_header { | |||||
275 | 0 | shift; ## No instance data | ||||
276 | 0 | pop; ## Ignore options | ||||
277 | 0 | my $ops = pop; | ||||
278 | ||||||
279 | 0 | return join( "", | ||||
280 | "@@ -", | |||||
281 | _range( $ops, A, "unified" ), | |||||
282 | " +", | |||||
283 | _range( $ops, B, "unified" ), | |||||
284 | " @@\n", | |||||
285 | ); | |||||
286 | } | |||||
287 | ||||||
288 | sub Text::Diff::Unified::hunk { | |||||
289 | 0 | shift; ## No instance data | ||||
290 | 0 | pop; ## Ignore options | ||||
291 | 0 | my $ops = pop; | ||||
292 | ||||||
293 | 0 | my $prefixes = { "+" => "+", " " => " ", "-" => "-" }; | ||||
294 | ||||||
295 | 0 | return join "", map _op_to_line( \@_, $_, undef, $prefixes ), @$ops | ||||
296 | } | |||||
297 | ||||||
298 | @Text::Diff::Context::ISA = qw( Text::Diff::Base ); | |||||
299 | ||||||
300 | sub Text::Diff::Context::file_header { | |||||
301 | 0 0 | _header { FILENAME_PREFIX_A=>"***", FILENAME_PREFIX_B=>"---", %{$_[-1]} }; | ||||
302 | } | |||||
303 | ||||||
304 | sub Text::Diff::Context::hunk_header { | |||||
305 | 0 | return "***************\n"; | ||||
306 | } | |||||
307 | ||||||
308 | sub Text::Diff::Context::hunk { | |||||
309 | 0 | shift; ## No instance data | ||||
310 | 0 | pop; ## Ignore options | ||||
311 | 0 | my $ops = pop; | ||||
312 | ## Leave the sequences in @_[0,1] | |||||
313 | ||||||
314 | 0 | my $a_range = _range( $ops, A, "" ); | ||||
315 | 0 | my $b_range = _range( $ops, B, "" ); | ||||
316 | ||||||
317 | ## Sigh. Gotta make sure that differences that aren't adds/deletions | |||||
318 | ## get prefixed with "!", and that the old opcodes are removed. | |||||
319 | 0 | my $after; | ||||
320 | 0 | for ( my $start = 0; $start <= $#$ops ; $start = $after ) { | ||||
321 | ## Scan until next difference | |||||
322 | 0 | $after = $start + 1; | ||||
323 | 0 | my $opcode = $ops->[$start]->[OPCODE]; | ||||
324 | 0 | next if $opcode eq " "; | ||||
325 | ||||||
326 | 0 | my $bang_it; | ||||
327 | 0 | while ( $after <= $#$ops && $ops->[$after]->[OPCODE] ne " " ) { | ||||
328 | 0 | $bang_it ||= $ops->[$after]->[OPCODE] ne $opcode; | ||||
329 | 0 | ++$after; | ||||
330 | } | |||||
331 | ||||||
332 | 0 | if ( $bang_it ) { | ||||
333 | 0 | for my $i ( $start..($after-1) ) { | ||||
334 | 0 | $ops->[$i]->[FLAG] = "!"; | ||||
335 | } | |||||
336 | } | |||||
337 | } | |||||
338 | ||||||
339 | 0 | my $b_prefixes = { "+" => "+ ", " " => " ", "-" => undef, "!" => "! " }; | ||||
340 | 0 | my $a_prefixes = { "+" => undef, " " => " ", "-" => "- ", "!" => "! " }; | ||||
341 | ||||||
342 | 0 | return join( "", | ||||
343 | "*** ", $a_range, " ****\n", | |||||
344 | map( _op_to_line( \@_, $_, A, $a_prefixes ), @$ops ), | |||||
345 | "--- ", $b_range, " ----\n", | |||||
346 | map( _op_to_line( \@_, $_, B, $b_prefixes ), @$ops ), | |||||
347 | ); | |||||
348 | } | |||||
349 | ||||||
350 | @Text::Diff::OldStyle::ISA = qw( Text::Diff::Base ); | |||||
351 | ||||||
352 | sub _op { | |||||
353 | 0 | my $ops = shift; | ||||
354 | 0 | my $op = $ops->[0]->[OPCODE]; | ||||
355 | 0 | $op = "c" if grep $_->[OPCODE] ne $op, @$ops; | ||||
356 | 0 | $op = "a" if $op eq "+"; | ||||
357 | 0 | $op = "d" if $op eq "-"; | ||||
358 | 0 | return $op; | ||||
359 | } | |||||
360 | ||||||
361 | sub Text::Diff::OldStyle::hunk_header { | |||||
362 | 0 | shift; ## No instance data | ||||
363 | 0 | pop; ## ignore options | ||||
364 | 0 | my $ops = pop; | ||||
365 | ||||||
366 | 0 | my $op = _op $ops; | ||||
367 | ||||||
368 | 0 | return join "", _range( $ops, A, "" ), $op, _range( $ops, B, "" ), "\n"; | ||||
369 | } | |||||
370 | ||||||
371 | sub Text::Diff::OldStyle::hunk { | |||||
372 | 0 | shift; ## No instance data | ||||
373 | 0 | pop; ## ignore options | ||||
374 | 0 | my $ops = pop; | ||||
375 | ## Leave the sequences in @_[0,1] | |||||
376 | ||||||
377 | 0 | my $a_prefixes = { "+" => undef, " " => undef, "-" => "< " }; | ||||
378 | 0 | my $b_prefixes = { "+" => "> ", " " => undef, "-" => undef }; | ||||
379 | ||||||
380 | 0 | my $op = _op $ops; | ||||
381 | ||||||
382 | 0 | return join( "", | ||||
383 | map( _op_to_line( \@_, $_, A, $a_prefixes ), @$ops ), | |||||
384 | $op eq "c" ? "---\n" : (), | |||||
385 | map( _op_to_line( \@_, $_, B, $b_prefixes ), @$ops ), | |||||
386 | ); | |||||
387 | } | |||||
388 | ||||||
389 | 1; | |||||
390 |