File Coverage

File:local/lib/perl5/Sort/Naturally.pm
Coverage:13.2%

linestmtbrancondsubtimecode
1
2require 5;
3package Sort::Naturally;  # Time-stamp: "2004-12-29 18:30:03 AST"
4$VERSION = '1.03';
5@EXPORT = ('nsort', 'ncmp');
6require Exporter;
7@ISA = ('Exporter');
8
9
2
2
2
4
2
33
use strict;
10
2
2
2
222
193
6
use locale;
11
2
2
2
25
1
6
use integer;
12
13#-----------------------------------------------------------------------------
14# constants:
15
2
55
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG }
16
17
2
2
2
4
0
16
use Config ();
18BEGIN {
19  # Make a constant such that if a whole-number string is that long
20  #  or shorter, we KNOW it's treatable as an integer
21
2
2
2
3
1
4
  no integer;
22
2
116
  my $x = length(256 ** $Config::Config{'intsize'} / 2) - 1;
23
2
5
  die "Crazy intsize: <$Config::Config{'intsize'}>" if $x < 4;
24
2
40
  eval 'sub MAX_INT_SIZE () {' . $x . '}';
25
2
5
  die $@ if $@;
26
2
202
  print "intsize $Config::Config{'intsize'} => MAX_INT_SIZE $x\n" if DEBUG;
27}
28
29sub X_FIRST () {-1}
30sub Y_FIRST () { 1}
31
32my @ORD = ('same', 'swap', 'asis');
33
34#-----------------------------------------------------------------------------
35# For lack of a preprocessor:
36
37my($code, $guts);
38$guts = <<'EOGUTS';  # This is the guts of both ncmp and nsort:
39
40    if($x eq $y) {
41      # trap this expensive case first, and then fall thru to tiebreaker
42      $rv = 0;
43
44    # Convoluted hack to get numerics to sort first, at string start:
45    } elsif($x =~ m/^\d/s) {
46      if($y =~ m/^\d/s) {
47        $rv = 0;    # fall thru to normal comparison for the two numbers
48      } else {
49        $rv = X_FIRST;
50        DEBUG > 1 and print "Numeric-initial $x trumps letter-initial $y\n";
51      }
52    } elsif($y =~ m/^\d/s) {
53      $rv = Y_FIRST;
54      DEBUG > 1 and print "Numeric-initial $y trumps letter-initial $x\n";
55    } else {
56      $rv = 0;
57    }
58
59    unless($rv) {
60      # Normal case:
61      $rv = 0;
62      DEBUG and print "<$x> and <$y> compared...\n";
63
64     Consideration:
65      while(length $x and length $y) {
66
67        DEBUG > 2 and print " <$x> and <$y>...\n";
68
69        # First, non-numeric comparison:
70        $x2 = ($x =~ m/^(\D+)/s) ? length($1) : 0;
71        $y2 = ($y =~ m/^(\D+)/s) ? length($1) : 0;
72        # Now make x2 the min length of the two:
73        $x2 = $y2 if $x2 > $y2;
74        if($x2) {
75          DEBUG > 1 and printf " <%s> and <%s> lexically for length $x2...\n",
76            substr($x,0,$x2), substr($y,0,$x2);
77          do {
78           my $i = substr($x,0,$x2);
79           my $j = substr($y,0,$x2);
80           my $sv = $i cmp $j;
81           print "SCREAM! on <$i><$j> -- $sv != $rv \n" unless $rv == $sv;
82           last;
83          }
84
85
86           if $rv =
87           # The ''. things here force a copy that seems to work around a
88           #  mysterious intermittent bug that 'use locale' provokes in
89           #  many versions of Perl.
90                   $cmp
91                   ? $cmp->(substr($x,0,$x2) . '',
92                            substr($y,0,$x2) . '',
93                           )
94                   :
95                   scalar(( substr($x,0,$x2) . '' ) cmp
96                          ( substr($y,0,$x2) . '' )
97                          )
98          ;
99          # otherwise trim and keep going:
100          substr($x,0,$x2) = '';
101          substr($y,0,$x2) = '';
102        }
103
104        # Now numeric:
105        #  (actually just using $x2 and $y2 as scratch)
106
107        if( $x =~ s/^(\d+)//s ) {
108          $x2 = $1;
109          if( $y =~ s/^(\d+)//s ) {
110            # We have two numbers here.
111            DEBUG > 1 and print " <$x2> and <$1> numerically\n";
112            if(length($x2) < MAX_INT_SIZE and length($1) < MAX_INT_SIZE) {
113              # small numbers: we can compare happily
114              last if $rv = $x2 <=> $1;
115            } else {
116              # ARBITRARILY large integers!
117
118              # This saves on loss of precision that could happen
119              #  with actual stringification.
120              # Also, I sense that very large numbers aren't too
121              #  terribly common in sort data.
122
123              # trim leading 0's:
124              ($y2 = $1) =~ s/^0+//s;
125              $x2 =~ s/^0+//s;
126              print "   Treating $x2 and $y2 as bigint\n" if DEBUG;
127
128              no locale; # we want the dumb cmp back.
129              last if $rv = (
130                 # works only for non-negative whole numbers:
131                 length($x2) <=> length($y2)
132                   # the longer the numeral, the larger the value
133                 or $x2 cmp $y2
134                   # between equals, compare lexically!!  amazing but true.
135              );
136            }
137          } else {
138            # X is numeric but Y isn't
139            $rv = Y_FIRST;
140            last;
141          }
142        } elsif( $y =~ s/^\d+//s ) {  # we don't need to capture the substring
143          $rv = X_FIRST;
144          last;
145        }
146         # else one of them is 0-length.
147
148       # end-while
149      }
150    }
151EOGUTS
152
153sub maker {
154  my $code = $_[0];
155  $code =~ s/~COMPARATOR~/$guts/g || die "Can't find ~COMPARATOR~";
156
2
2
2
2
2
2
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
5
1
4
5
2
3
  eval $code;
157  die $@ if $@;
158}
159
160##############################################################################
161
162maker(<<'EONSORT');
163sub nsort {
164  # get options:
165  my($cmp, $lc);
166  ($cmp,$lc) = @{shift @_} if @_ and ref($_[0]) eq 'ARRAY';
167
168  return @_ unless @_ > 1 or wantarray; # be clever
169
170  my($x, $x2, $y, $y2, $rv);  # scratch vars
171
172  # We use a Schwartzian xform to memoize the lc'ing and \W-removal
173
174  map $_->[0],
175  sort {
176    if($a->[0] eq $b->[0]) { 0 }   # trap this expensive case
177    else {
178
179    $x = $a->[1];
180    $y = $b->[1];
181
182~COMPARATOR~
183
184    # Tiebreakers...
185    DEBUG > 1 and print " -<${$a}[0]> cmp <${$b}[0]> is $rv ($ORD[$rv])\n";
186    $rv ||= (length($x) <=> length($y))  # shorter is always first
187        ||  ($cmp and $cmp->($x,$y) || $cmp->($a->[0], $b->[0]))
188        ||  ($x      cmp $y     )
189        ||  ($a->[0] cmp $b->[0])
190    ;
191
192    DEBUG > 1 and print "  <${$a}[0]> cmp <${$b}[0]> is $rv ($ORD[$rv])\n";
193    $rv;
194  }}
195
196  map {;
197    $x = $lc ? $lc->($_) : lc($_); # x as scratch
198    $x =~ s/\W+//s;
199    [$_, $x];
200  }
201  @_
202}
203EONSORT
204
205#-----------------------------------------------------------------------------
206maker(<<'EONCMP');
207sub ncmp {
208  # The guts are basically the same as above...
209
210  # get options:
211  my($cmp, $lc);
212  ($cmp,$lc) = @{shift @_} if @_ and ref($_[0]) eq 'ARRAY';
213
214  if(@_ == 0) {
215    @_ = ($a, $b); # bit of a hack!
216    DEBUG > 1 and print "Hacking in <$a><$b>\n";
217  } elsif(@_ != 2) {
218    require Carp;
219    Carp::croak("Not enough options to ncmp!");
220  }
221  my($a,$b) = @_;
222  my($x, $x2, $y, $y2, $rv);  # scratch vars
223
224  DEBUG > 1 and print "ncmp args <$a><$b>\n";
225  if($a eq $b) { # trap this expensive case
226    0;
227  } else {
228    $x = ($lc ? $lc->($a) : lc($a));
229    $x =~ s/\W+//s;
230    $y = ($lc ? $lc->($b) : lc($b));
231    $y =~ s/\W+//s;
232
233~COMPARATOR~
234
235
236    # Tiebreakers...
237    DEBUG > 1 and print " -<$a> cmp <$b> is $rv ($ORD[$rv])\n";
238    $rv ||= (length($x) <=> length($y))  # shorter is always first
239        ||  ($cmp and $cmp->($x,$y) || $cmp->($a,$b))
240        ||  ($x cmp $y)
241        ||  ($a cmp $b)
242    ;
243
244    DEBUG > 1 and print "  <$a> cmp <$b> is $rv\n";
245    $rv;
246  }
247}
248EONCMP
249
250# clean up:
251undef $guts;
252undef &maker;
253
254#-----------------------------------------------------------------------------
2551;
256
257############### END OF MAIN SOURCE ###########################################