File Coverage

File:	local/lib/perl5/Text/Diff.pm
Coverage:	11.7%

line	stmt	bran	cond	sub	time	code
1						package Text::Diff;
2
3	6 6			6	48 10	use 5.00503;
4	6 6 6			6	8 4 58	use strict;
5	6 6 6			6	10 4 490	use Carp;
6	6 6 6			6	10 4 30	use Exporter ();
7	6 6 6			6	1259 7 63	use Algorithm::Diff ();
8	6 6 6			6	12 3 198	use vars qw{$VERSION @ISA @EXPORT};
9						BEGIN {
10	6			6	5	$VERSION = '1.41';
11	6				26	@ISA = 'Exporter';
12	6				49	@EXPORT = 'diff';
13						};
14
15						## Hunks are made of ops. An op is the starting index for each
16						## sequence and the opcode:
17	6 6 6			6	12 4 191	use constant A => 0; # Array index before match/discard
18	6 6 6			6	11 4 88	use constant B => 1;
19	6 6 6			6	10 3 90	use constant OPCODE => 2; # "-", " ", "+"
20	6 6 6			6	11 2 6158	use constant FLAG => 3; # What to display if not OPCODE "!"
21
22						my %internal_styles = (
23						Unified => undef,
24						Context => undef,
25						OldStyle => undef,
26						Table => undef, ## "internal", but in another module
27						);
28
29						sub diff {
30	0			0		my @seqs = ( shift, shift );
31	0		0			my $options = shift \|\| {};
32
33	0					for my $i ( 0 .. 1 ) {
34	0					my $seq = $seqs[$i];
35	0					my $type = ref $seq;
36
37	0					while ( $type eq "CODE" ) {
38	0					$seqs[$i] = $seq = $seq->( $options );
39	0					$type = ref $seq;
40						}
41
42	0	0				my $AorB = !$i ? "A" : "B";
43
44	0	0 0 0 0	0			if ( $type eq "ARRAY" ) {
45						## This is most efficient :)
46	0	0				$options->{"OFFSET_$AorB"} = 0
47						unless defined $options->{"OFFSET_$AorB"};
48						}
49						elsif ( $type eq "SCALAR" ) {
50	0					$seqs[$i] = [split( /^/m, $$seq )];
51	0	0				$options->{"OFFSET_$AorB"} = 1
52						unless defined $options->{"OFFSET_$AorB"};
53						}
54						elsif ( ! $type ) {
55	0	0				$options->{"OFFSET_$AorB"} = 1
56						unless defined $options->{"OFFSET_$AorB"};
57	0	0				$options->{"FILENAME_$AorB"} = $seq
58						unless defined $options->{"FILENAME_$AorB"};
59	0	0				$options->{"MTIME_$AorB"} = (stat($seq))[9]
60						unless defined $options->{"MTIME_$AorB"};
61
62	0					local $/ = "\n";
63	0	0				open F, "<$seq" or carp "$!: $seq";
64	0					$seqs[$i] = [<F>];
65	0					close F;
66
67						}
68						elsif ( $type eq "GLOB" \|\| UNIVERSAL::isa( $seq, "IO::Handle" ) ) {
69	0	0				$options->{"OFFSET_$AorB"} = 1
70						unless defined $options->{"OFFSET_$AorB"};
71	0					local $/ = "\n";
72	0					$seqs[$i] = [<$seq>];
73						}
74						else {
75	0					confess "Can't handle input of type ", ref;
76						}
77						}
78
79						## Config vars
80	0					my $output;
81	0					my $output_handler = $options->{OUTPUT};
82	0					my $type = ref $output_handler ;
83	0	0 0 0 0 0	0			if ( ! defined $output_handler ) {
84	0					$output = "";
85	0 0			0		$output_handler = sub { $output .= shift };
86						}
87						elsif ( $type eq "CODE" ) {
88						## No problems, mate.
89						}
90						elsif ( $type eq "SCALAR" ) {
91	0					my $out_ref = $output_handler;
92	0 0			0		$output_handler = sub { $$out_ref .= shift };
93						}
94						elsif ( $type eq "ARRAY" ) {
95	0					my $out_ref = $output_handler;
96	0 0			0		$output_handler = sub { push @$out_ref, shift };
97						}
98						elsif ( $type eq "GLOB" \|\| UNIVERSAL::isa $output_handler, "IO::Handle" ) {
99	0					my $output_handle = $output_handler;
100	0 0			0		$output_handler = sub { print $output_handle shift };
101						}
102						else {
103	0					croak "Unrecognized output type: $type";
104						}
105
106	0					my $style = $options->{STYLE};
107	0	0				$style = "Unified" unless defined $options->{STYLE};
108	0	0				$style = "Text::Diff::$style" if exists $internal_styles{$style};
109
110	0	0				if ( ! $style->can( "hunk" ) ) {
111	0	0				eval "require $style; 1" or die $@;
112						}
113
114	0	0	0			$style = $style->new
115						if ! ref $style && $style->can( "new" );
116
117	0					my $ctx_lines = $options->{CONTEXT};
118	0	0				$ctx_lines = 3 unless defined $ctx_lines;
119	0	0				$ctx_lines = 0 if $style->isa( "Text::Diff::OldStyle" );
120
121	0					my @keygen_args = $options->{KEYGEN_ARGS}
122	0	0				? @{$options->{KEYGEN_ARGS}}
123						: ();
124
125						## State vars
126	0					my $diffs = 0; ## Number of discards this hunk
127	0					my $ctx = 0; ## Number of " " (ctx_lines) ops pushed after last diff.
128	0					my @ops; ## ops (" ", +, -) in this hunk
129	0					my $hunks = 0; ## Number of hunks
130
131						my $emit_ops = sub {
132	0	0		0		$output_handler->( $style->file_header( @seqs, $options ) )
133						unless $hunks++;
134	0					$output_handler->( $style->hunk_header( @seqs, @_, $options ) );
135	0					$output_handler->( $style->hunk ( @seqs, @_, $options ) );
136	0					$output_handler->( $style->hunk_footer( @seqs, @_, $options ) );
137	0					};
138
139						## We keep 2*ctx_lines so that if a diff occurs
140						## at 2*ctx_lines we continue to grow the hunk instead
141						## of emitting diffs and context as we go. We
142						## need to know the total length of both of the two
143						## subsequences so the line count can be printed in the
144						## header.
145	0 0 0 0			0		my $dis_a = sub {push @ops, [@_[0,1],"-"]; ++$diffs ; $ctx = 0 };
146	0 0 0 0			0		my $dis_b = sub {push @ops, [@_[0,1],"+"]; ++$diffs ; $ctx = 0 };
147
148						Algorithm::Diff::traverse_sequences(
149						@seqs,
150						{
151						MATCH => sub {
152	0			0		push @ops, [@_[0,1]," "];
153
154	0	0	0			if ( $diffs && ++$ctx > $ctx_lines * 2 ) {
155	0					$emit_ops->( [ splice @ops, 0, $#ops - $ctx_lines ] );
156	0					$ctx = $diffs = 0;
157						}
158
159						## throw away context lines that aren't needed any more
160	0	0	0			shift @ops if ! $diffs && @ops > $ctx_lines;
161						},
162	0					DISCARD_A => $dis_a,
163						DISCARD_B => $dis_b,
164						},
165						$options->{KEYGEN}, # pass in user arguments for key gen function
166						@keygen_args,
167						);
168
169	0	0				if ( $diffs ) {
170	0	0				$#ops -= $ctx - $ctx_lines if $ctx > $ctx_lines;
171	0					$emit_ops->( \@ops );
172						}
173
174	0	0				$output_handler->( $style->file_footer( @seqs, $options ) ) if $hunks;
175
176	0	0				return defined $output ? $output : $hunks;
177						}
178
179						sub _header {
180	0			0		my ( $h ) = @_;
181	0					my ( $p1, $fn1, $t1, $p2, $fn2, $t2 ) = @{$h}{
182	0					"FILENAME_PREFIX_A",
183						"FILENAME_A",
184						"MTIME_A",
185						"FILENAME_PREFIX_B",
186						"FILENAME_B",
187						"MTIME_B"
188						};
189
190						## remember to change Text::Diff::Table if this logic is tweaked.
191	0	0	0			return "" unless defined $fn1 && defined $fn2;
192
193	0	0 0				return join( "",
194						$p1, " ", $fn1, defined $t1 ? "\t" . localtime $t1 : (), "\n",
195						$p2, " ", $fn2, defined $t2 ? "\t" . localtime $t2 : (), "\n",
196						);
197						}
198
199						## _range encapsulates the building of, well, ranges. Turns out there are
200						## a few nuances.
201						sub _range {
202	0			0		my ( $ops, $a_or_b, $format ) = @_;
203
204	0					my $start = $ops->[ 0]->[$a_or_b];
205	0					my $after = $ops->[-1]->[$a_or_b];
206
207						## The sequence indexes in the lines are from before the OPCODE is
208						## executed, so we bump the last index up unless the OP indicates
209						## it didn't change.
210	0	0 0				++$after
211						unless $ops->[-1]->[OPCODE] eq ( $a_or_b == A ? "+" : "-" );
212
213						## convert from 0..n index to 1..(n+1) line number. The unless modifier
214						## handles diffs with no context, where only one file is affected. In this
215						## case $start == $after indicates an empty range, and the $start must
216						## not be incremented.
217	0					my $empty_range = $start == $after;
218	0	0				++$start unless $empty_range;
219
220						return
221	0	0 0 0	0			$start == $after
222						? $format eq "unified" && $empty_range
223						? "$start,0"
224						: $start
225						: $format eq "unified"
226						? "$start,".($after-$start+1)
227						: "$start,$after";
228						}
229
230						sub _op_to_line {
231	0			0		my ( $seqs, $op, $a_or_b, $op_prefixes ) = @_;
232
233	0					my $opcode = $op->[OPCODE];
234	0	0				return () unless defined $op_prefixes->{$opcode};
235
236	0	0				my $op_sym = defined $op->[FLAG] ? $op->[FLAG] : $opcode;
237	0					$op_sym = $op_prefixes->{$op_sym};
238	0	0				return () unless defined $op_sym;
239
240	0	0 0				$a_or_b = $op->[OPCODE] ne "+" ? 0 : 1 unless defined $a_or_b;
241	0					return ( $op_sym, $seqs->[$a_or_b][$op->[$a_or_b]] );
242						}
243
244						SCOPE: {
245						package Text::Diff::Base;
246
247						sub new {
248	0			0		my $proto = shift;
249	0		0			return bless { @_ }, ref $proto \|\| $proto;
250						}
251
252	0			0		sub file_header { return "" }
253
254	0			0		sub hunk_header { return "" }
255
256	0			0		sub hunk { return "" }
257
258	0			0		sub hunk_footer { return "" }
259
260	0			0		sub file_footer { return "" }
261						}
262
263						@Text::Diff::Unified::ISA = qw( Text::Diff::Base );
264
265						sub Text::Diff::Unified::file_header {
266	0			0		shift; ## No instance data
267	0					my $options = pop ;
268
269	0					_header(
270						{ FILENAME_PREFIX_A => "---", FILENAME_PREFIX_B => "+++", %$options }
271						);
272						}
273
274						sub Text::Diff::Unified::hunk_header {
275	0			0		shift; ## No instance data
276	0					pop; ## Ignore options
277	0					my $ops = pop;
278
279	0					return join( "",
280						"@@ -",
281						_range( $ops, A, "unified" ),
282						" +",
283						_range( $ops, B, "unified" ),
284						" @@\n",
285						);
286						}
287
288						sub Text::Diff::Unified::hunk {
289	0			0		shift; ## No instance data
290	0					pop; ## Ignore options
291	0					my $ops = pop;
292
293	0					my $prefixes = { "+" => "+", " " => " ", "-" => "-" };
294
295	0					return join "", map _op_to_line( \@_, $_, undef, $prefixes ), @$ops
296						}
297
298						@Text::Diff::Context::ISA = qw( Text::Diff::Base );
299
300						sub Text::Diff::Context::file_header {
301	0 0			0		_header { FILENAME_PREFIX_A=>"***", FILENAME_PREFIX_B=>"---", %{$_[-1]} };
302						}
303
304						sub Text::Diff::Context::hunk_header {
305	0			0		return "***************\n";
306						}
307
308						sub Text::Diff::Context::hunk {
309	0			0		shift; ## No instance data
310	0					pop; ## Ignore options
311	0					my $ops = pop;
312						## Leave the sequences in @_[0,1]
313
314	0					my $a_range = _range( $ops, A, "" );
315	0					my $b_range = _range( $ops, B, "" );
316
317						## Sigh. Gotta make sure that differences that aren't adds/deletions
318						## get prefixed with "!", and that the old opcodes are removed.
319	0					my $after;
320	0					for ( my $start = 0; $start <= $#$ops ; $start = $after ) {
321						## Scan until next difference
322	0					$after = $start + 1;
323	0					my $opcode = $ops->[$start]->[OPCODE];
324	0	0				next if $opcode eq " ";
325
326	0					my $bang_it;
327	0		0			while ( $after <= $#$ops && $ops->[$after]->[OPCODE] ne " " ) {
328	0		0			$bang_it \|\|= $ops->[$after]->[OPCODE] ne $opcode;
329	0					++$after;
330						}
331
332	0	0				if ( $bang_it ) {
333	0					for my $i ( $start..($after-1) ) {
334	0					$ops->[$i]->[FLAG] = "!";
335						}
336						}
337						}
338
339	0					my $b_prefixes = { "+" => "+ ", " " => " ", "-" => undef, "!" => "! " };
340	0					my $a_prefixes = { "+" => undef, " " => " ", "-" => "- ", "!" => "! " };
341
342	0					return join( "",
343						"* ", $a_range, " **\n",
344						map( _op_to_line( \@_, $_, A, $a_prefixes ), @$ops ),
345						"--- ", $b_range, " ----\n",
346						map( _op_to_line( \@_, $_, B, $b_prefixes ), @$ops ),
347						);
348						}
349
350						@Text::Diff::OldStyle::ISA = qw( Text::Diff::Base );
351
352						sub _op {
353	0			0		my $ops = shift;
354	0					my $op = $ops->[0]->[OPCODE];
355	0	0				$op = "c" if grep $_->[OPCODE] ne $op, @$ops;
356	0	0				$op = "a" if $op eq "+";
357	0	0				$op = "d" if $op eq "-";
358	0					return $op;
359						}
360
361						sub Text::Diff::OldStyle::hunk_header {
362	0			0		shift; ## No instance data
363	0					pop; ## ignore options
364	0					my $ops = pop;
365
366	0					my $op = _op $ops;
367
368	0					return join "", _range( $ops, A, "" ), $op, _range( $ops, B, "" ), "\n";
369						}
370
371						sub Text::Diff::OldStyle::hunk {
372	0			0		shift; ## No instance data
373	0					pop; ## ignore options
374	0					my $ops = pop;
375						## Leave the sequences in @_[0,1]
376
377	0					my $a_prefixes = { "+" => undef, " " => undef, "-" => "< " };
378	0					my $b_prefixes = { "+" => "> ", " " => undef, "-" => undef };
379
380	0					my $op = _op $ops;
381
382	0	0				return join( "",
383						map( _op_to_line( \@_, $_, A, $a_prefixes ), @$ops ),
384						$op eq "c" ? "---\n" : (),
385						map( _op_to_line( \@_, $_, B, $b_prefixes ), @$ops ),
386						);
387						}
388
389						1;
390