oct-sort.cc Source File

00001 /*
00002 Copyright (C) 2003 David Bateman
00003 
00004 This file is part of Octave.
00005 
00006 Octave is free software; you can redistribute it and/or modify it
00007 under the terms of the GNU General Public License as published by the
00008 Free Software Foundation; either version 2, or (at your option) any
00009 later version.
00010 
00011 Octave is distributed in the hope that it will be useful, but WITHOUT
00012 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00013 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00014 for more details.
00015 
00016 You should have received a copy of the GNU General Public License
00017 along with Octave; see the file COPYING.  If not, write to the Free
00018 Software Foundation, 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00019 
00020 Code stolen in large part from Python's, listobject.c, which itself had
00021 no license header. However, thanks to Tim Peters for the parts of the
00022 code I ripped-off.
00023 
00024 As required in the Python license the short description of the changes
00025 made are
00026 
00027 * convert the sorting code in listobject.cc into a generic class, 
00028   replacing PyObject* with the type of the class T.
00029 
00030 The Python license is
00031 
00032   PSF LICENSE AGREEMENT FOR PYTHON 2.3
00033   --------------------------------------
00034 
00035   1. This LICENSE AGREEMENT is between the Python Software Foundation
00036   ("PSF"), and the Individual or Organization ("Licensee") accessing and
00037   otherwise using Python 2.3 software in source or binary form and its
00038   associated documentation.
00039 
00040   2. Subject to the terms and conditions of this License Agreement, PSF
00041   hereby grants Licensee a nonexclusive, royalty-free, world-wide
00042   license to reproduce, analyze, test, perform and/or display publicly,
00043   prepare derivative works, distribute, and otherwise use Python 2.3
00044   alone or in any derivative version, provided, however, that PSF's
00045   License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
00046   2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
00047   retained in Python 2.3 alone or in any derivative version prepared by
00048   Licensee.
00049 
00050   3. In the event Licensee prepares a derivative work that is based on
00051   or incorporates Python 2.3 or any part thereof, and wants to make
00052   the derivative work available to others as provided herein, then
00053   Licensee hereby agrees to include in any such work a brief summary of
00054   the changes made to Python 2.3.
00055 
00056   4. PSF is making Python 2.3 available to Licensee on an "AS IS"
00057   basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
00058   IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
00059   DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
00060   FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
00061   INFRINGE ANY THIRD PARTY RIGHTS.
00062 
00063   5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
00064   2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
00065   A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
00066   OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
00067 
00068   6. This License Agreement will automatically terminate upon a material
00069   breach of its terms and conditions.
00070 
00071   7. Nothing in this License Agreement shall be deemed to create any
00072   relationship of agency, partnership, or joint venture between PSF and
00073   Licensee.  This License Agreement does not grant permission to use PSF
00074   trademarks or trade name in a trademark sense to endorse or promote
00075   products or services of Licensee, or any third party.
00076 
00077   8. By copying, installing or otherwise using Python 2.3, Licensee
00078   agrees to be bound by the terms and conditions of this License
00079   Agreement.
00080 */
00081 
00082 #ifdef HAVE_CONFIG_H
00083 #include <config.h>
00084 #endif
00085 
00086 #include "lo-mappers.h"
00087 #include "quit.h"
00088 #include "oct-sort.h"
00089 
00090 #define IFLT(a,b)  if (compare == NULL ? ((a) < (b)) : compare ((a), (b)))
00091 
00092 template <class T>
00093 octave_sort<T>::octave_sort (void) : compare (NULL) 
00094 { 
00095   merge_init (); 
00096   merge_getmem (1024);
00097 }
00098 
00099 template <class T>
00100 octave_sort<T>::octave_sort (bool (*comp) (T, T)) : compare (comp) 
00101 { 
00102   merge_init (); 
00103   merge_getmem (1024);
00104 }
00105   
00106 /* Reverse a slice of a list in place, from lo up to (exclusive) hi. */
00107 template <class T>
00108 void
00109 octave_sort<T>::reverse_slice(T *lo, T *hi)
00110 {
00111   --hi;
00112   while (lo < hi) 
00113     {
00114       T t = *lo;
00115       *lo = *hi;
00116       *hi = t;
00117       ++lo;
00118       --hi;
00119     }
00120 }
00121 
00122 template <class T>
00123 void
00124 octave_sort<T>::binarysort (T *lo, T *hi, T *start)
00125 {
00126   register T *l, *p, *r;
00127   register T pivot;
00128 
00129   if (lo == start)
00130     ++start;
00131 
00132   for (; start < hi; ++start) 
00133     {
00134       /* set l to where *start belongs */
00135       l = lo;
00136       r = start;
00137       pivot = *r;
00138       /* Invariants:
00139        * pivot >= all in [lo, l).
00140        * pivot  < all in [r, start).
00141        * The second is vacuously true at the start.
00142        */
00143       do 
00144         {
00145           p = l + ((r - l) >> 1);
00146           IFLT (pivot, *p)
00147             r = p;
00148           else
00149             l = p+1;
00150         } 
00151       while (l < r);
00152       /* The invariants still hold, so pivot >= all in [lo, l) and
00153          pivot < all in [l, start), so pivot belongs at l.  Note
00154          that if there are elements equal to pivot, l points to the
00155          first slot after them -- that's why this sort is stable.
00156          Slide over to make room.
00157          Caution: using memmove is much slower under MSVC 5;
00158          we're not usually moving many slots. */
00159       for (p = start; p > l; --p)
00160         *p = *(p-1);
00161       *l = pivot;
00162     }
00163 
00164   return;
00165 }
00166 
00167 /*
00168 Return the length of the run beginning at lo, in the slice [lo, hi).  lo < hi
00169 is required on entry.  "A run" is the longest ascending sequence, with
00170 
00171     lo[0] <= lo[1] <= lo[2] <= ...
00172 
00173 or the longest descending sequence, with
00174 
00175     lo[0] > lo[1] > lo[2] > ...
00176 
00177 Boolean *descending is set to 0 in the former case, or to 1 in the latter.
00178 For its intended use in a stable mergesort, the strictness of the defn of
00179 "descending" is needed so that the caller can safely reverse a descending
00180 sequence without violating stability (strict > ensures there are no equal
00181 elements to get out of order).
00182 
00183 Returns -1 in case of error.
00184 */
00185 template <class T>
00186 int
00187 octave_sort<T>::count_run(T *lo, T *hi, int *descending)
00188 {
00189   int n;
00190 
00191   *descending = 0;
00192   ++lo;
00193   if (lo == hi)
00194     return 1;
00195 
00196   n = 2;
00197 
00198   IFLT (*lo, *(lo-1))
00199     {
00200       *descending = 1;
00201       for (lo = lo+1; lo < hi; ++lo, ++n) 
00202         {
00203           IFLT (*lo, *(lo-1))
00204             ;
00205           else
00206             break;
00207         }
00208     }
00209   else 
00210     {
00211       for (lo = lo+1; lo < hi; ++lo, ++n) 
00212         {
00213           IFLT (*lo, *(lo-1))
00214             break;
00215         }
00216     }
00217 
00218   return n;
00219 }
00220 
00221 /*
00222 Locate the proper position of key in a sorted vector; if the vector contains
00223 an element equal to key, return the position immediately to the left of
00224 the leftmost equal element.  [gallop_right() does the same except returns
00225 the position to the right of the rightmost equal element (if any).]
00226 
00227 "a" is a sorted vector with n elements, starting at a[0].  n must be > 0.
00228 
00229 "hint" is an index at which to begin the search, 0 <= hint < n.  The closer
00230 hint is to the final result, the faster this runs.
00231 
00232 The return value is the int k in 0..n such that
00233 
00234     a[k-1] < key <= a[k]
00235 
00236 pretending that *(a-1) is minus infinity and a[n] is plus infinity.  IOW,
00237 key belongs at index k; or, IOW, the first k elements of a should precede
00238 key, and the last n-k should follow key.
00239 
00240 Returns -1 on error.  See listsort.txt for info on the method.
00241 */
00242 template <class T>
00243 int
00244 octave_sort<T>::gallop_left(T key, T *a, int n, int hint)
00245 {
00246   int ofs;
00247   int lastofs;
00248   int k;
00249 
00250   a += hint;
00251   lastofs = 0;
00252   ofs = 1;
00253   IFLT (*a, key)
00254     {
00255       /* a[hint] < key -- gallop right, until
00256        * a[hint + lastofs] < key <= a[hint + ofs]
00257        */
00258       const int maxofs = n - hint;      /* &a[n-1] is highest */
00259       while (ofs < maxofs) 
00260         {
00261           IFLT (a[ofs], key)
00262             {
00263               lastofs = ofs;
00264               ofs = (ofs << 1) + 1;
00265               if (ofs <= 0)     /* int overflow */
00266                 ofs = maxofs;
00267             }
00268           else  /* key <= a[hint + ofs] */
00269             break;
00270         }
00271       if (ofs > maxofs)
00272         ofs = maxofs;
00273       /* Translate back to offsets relative to &a[0]. */
00274       lastofs += hint;
00275       ofs += hint;
00276     }
00277   else 
00278     {
00279       /* key <= a[hint] -- gallop left, until
00280        * a[hint - ofs] < key <= a[hint - lastofs]
00281        */
00282       const int maxofs = hint + 1;      /* &a[0] is lowest */
00283       while (ofs < maxofs) 
00284         {
00285           IFLT (*(a-ofs), key)
00286             break;
00287           /* key <= a[hint - ofs] */
00288           lastofs = ofs;
00289           ofs = (ofs << 1) + 1;
00290           if (ofs <= 0) /* int overflow */
00291             ofs = maxofs;
00292         }
00293       if (ofs > maxofs)
00294         ofs = maxofs;
00295       /* Translate back to positive offsets relative to &a[0]. */
00296       k = lastofs;
00297       lastofs = hint - ofs;
00298       ofs = hint - k;
00299     }
00300   a -= hint;
00301 
00302   /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
00303    * right of lastofs but no farther right than ofs.  Do a binary
00304    * search, with invariant a[lastofs-1] < key <= a[ofs].
00305    */
00306   ++lastofs;
00307   while (lastofs < ofs) 
00308     {
00309       int m = lastofs + ((ofs - lastofs) >> 1);
00310 
00311       IFLT (a[m], key)
00312         lastofs = m+1;  /* a[m] < key */
00313       else
00314         ofs = m;        /* key <= a[m] */
00315     }
00316   return ofs;
00317 }
00318 
00319 /*
00320 Exactly like gallop_left(), except that if key already exists in a[0:n],
00321 finds the position immediately to the right of the rightmost equal value.
00322 
00323 The return value is the int k in 0..n such that
00324 
00325     a[k-1] <= key < a[k]
00326 
00327 or -1 if error.
00328 
00329 The code duplication is massive, but this is enough different given that
00330 we're sticking to "<" comparisons that it's much harder to follow if
00331 written as one routine with yet another "left or right?" flag.
00332 */
00333 template <class T>
00334 int
00335 octave_sort<T>::gallop_right(T key, T *a, int n, int hint)
00336 {
00337   int ofs;
00338   int lastofs;
00339   int k;
00340 
00341   a += hint;
00342   lastofs = 0;
00343   ofs = 1;
00344   IFLT (key, *a)
00345     {
00346       /* key < a[hint] -- gallop left, until
00347        * a[hint - ofs] <= key < a[hint - lastofs]
00348        */
00349       const int maxofs = hint + 1;      /* &a[0] is lowest */
00350       while (ofs < maxofs) 
00351         {
00352           IFLT (key, *(a-ofs))
00353             {
00354               lastofs = ofs;
00355               ofs = (ofs << 1) + 1;
00356               if (ofs <= 0)     /* int overflow */
00357                 ofs = maxofs;
00358             }
00359           else  /* a[hint - ofs] <= key */
00360             break;
00361         }
00362       if (ofs > maxofs)
00363         ofs = maxofs;
00364       /* Translate back to positive offsets relative to &a[0]. */
00365       k = lastofs;
00366       lastofs = hint - ofs;
00367       ofs = hint - k;
00368     }
00369   else 
00370     {
00371       /* a[hint] <= key -- gallop right, until
00372        * a[hint + lastofs] <= key < a[hint + ofs]
00373        */
00374       const int maxofs = n - hint;      /* &a[n-1] is highest */
00375       while (ofs < maxofs) 
00376         {
00377           IFLT (key, a[ofs])
00378             break;
00379           /* a[hint + ofs] <= key */
00380           lastofs = ofs;
00381           ofs = (ofs << 1) + 1;
00382           if (ofs <= 0) /* int overflow */
00383             ofs = maxofs;
00384         }
00385       if (ofs > maxofs)
00386         ofs = maxofs;
00387       /* Translate back to offsets relative to &a[0]. */
00388       lastofs += hint;
00389       ofs += hint;
00390     }
00391   a -= hint;
00392 
00393   /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
00394    * right of lastofs but no farther right than ofs.  Do a binary
00395    * search, with invariant a[lastofs-1] <= key < a[ofs].
00396    */
00397   ++lastofs;
00398   while (lastofs < ofs) 
00399     {
00400       int m = lastofs + ((ofs - lastofs) >> 1);
00401 
00402       IFLT (key, a[m])
00403         ofs = m;        /* key < a[m] */
00404       else
00405         lastofs = m+1;  /* a[m] <= key */
00406     }
00407   return ofs;
00408 }
00409 
00410 /* Conceptually a MergeState's constructor. */
00411 template <class T>
00412 void
00413 octave_sort<T>::merge_init(void)
00414 {
00415   ms.a = NULL;
00416   ms.alloced = 0;
00417   ms.n = 0;
00418   ms.min_gallop = MIN_GALLOP;
00419 }
00420 
00421 /* Free all the temp memory owned by the MergeState.  This must be called
00422  * when you're done with a MergeState, and may be called before then if
00423  * you want to free the temp memory early.
00424  */
00425 template <class T>
00426 void
00427 octave_sort<T>::merge_freemem(void)
00428 {
00429   if (ms.a)
00430     free (ms.a);
00431   ms.alloced = 0;
00432   ms.a = NULL;
00433 }
00434 
00435 static inline int
00436 roundupsize(int n)
00437 {
00438   unsigned int nbits = 3;
00439   unsigned int n2 = (unsigned int)n >> 8;
00440 
00441   /* Round up:
00442    * If n <       256, to a multiple of        8.
00443    * If n <      2048, to a multiple of       64.
00444    * If n <     16384, to a multiple of      512.
00445    * If n <    131072, to a multiple of     4096.
00446    * If n <   1048576, to a multiple of    32768.
00447    * If n <   8388608, to a multiple of   262144.
00448    * If n <  67108864, to a multiple of  2097152.
00449    * If n < 536870912, to a multiple of 16777216.
00450    * ...
00451    * If n < 2**(5+3*i), to a multiple of 2**(3*i).
00452    *
00453    * This over-allocates proportional to the list size, making room
00454    * for additional growth.  The over-allocation is mild, but is
00455    * enough to give linear-time amortized behavior over a long
00456    * sequence of appends() in the presence of a poorly-performing
00457    * system realloc() (which is a reality, e.g., across all flavors
00458    * of Windows, with Win9x behavior being particularly bad -- and
00459    * we've still got address space fragmentation problems on Win9x
00460    * even with this scheme, although it requires much longer lists to
00461    * provoke them than it used to).
00462    */
00463   while (n2) {
00464     n2 >>= 3;
00465     nbits += 3;
00466   }
00467   return ((n >> nbits) + 1) << nbits;
00468 }
00469 
00470 /* Ensure enough temp memory for 'need' array slots is available.
00471  * Returns 0 on success and -1 if the memory can't be gotten.
00472  */
00473 template <class T>
00474 int
00475 octave_sort<T>::merge_getmem(int need)
00476 {
00477   if (need <= ms.alloced)
00478     return 0;
00479 
00480   need = roundupsize(need); 
00481   /* Don't realloc!  That can cost cycles to copy the old data, but
00482    * we don't care what's in the block.
00483    */
00484   merge_freemem( );
00485   ms.a = (T *) malloc (need * sizeof (T));
00486   if (ms.a) {
00487     ms.alloced = need;
00488     return 0;
00489   }
00490   merge_freemem( );     /* reset to sane state */
00491   return -1;
00492 }
00493 
00494 #define MERGE_GETMEM(NEED) ((NEED) <= ms.alloced ? 0 :  \
00495                                 merge_getmem(NEED))
00496 
00497 /* Merge the na elements starting at pa with the nb elements starting at pb
00498  * in a stable way, in-place.  na and nb must be > 0, and pa + na == pb.
00499  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
00500  * merge, and should have na <= nb.  See listsort.txt for more info.
00501  * Return 0 if successful, -1 if error.
00502  */
00503 template <class T>
00504 int
00505 octave_sort<T>::merge_lo(T *pa, int na, T *pb, int nb)
00506 {
00507   int k;
00508   T *dest;
00509   int result = -1;      /* guilty until proved innocent */
00510   int min_gallop = ms.min_gallop;
00511 
00512   if (MERGE_GETMEM(na) < 0)
00513     return -1;
00514   memcpy(ms.a, pa, na * sizeof(T));
00515   dest = pa;
00516   pa = ms.a;
00517 
00518   *dest++ = *pb++;
00519   --nb;
00520   if (nb == 0)
00521     goto Succeed;
00522   if (na == 1)
00523     goto CopyB;
00524 
00525   for (;;) {
00526     int acount = 0;     /* # of times A won in a row */
00527     int bcount = 0;     /* # of times B won in a row */
00528 
00529     /* Do the straightforward thing until (if ever) one run
00530      * appears to win consistently.
00531      */
00532     for (;;) {
00533 
00534       IFLT (*pb, *pa)
00535         {
00536           *dest++ = *pb++;
00537           ++bcount;
00538           acount = 0;
00539           --nb;
00540           if (nb == 0)
00541             goto Succeed;
00542           if (bcount >= min_gallop)
00543             break;
00544         }
00545       else 
00546         {
00547           *dest++ = *pa++;
00548           ++acount;
00549           bcount = 0;
00550           --na;
00551           if (na == 1)
00552             goto CopyB;
00553           if (acount >= min_gallop)
00554             break;
00555         }
00556     }
00557 
00558     /* One run is winning so consistently that galloping may
00559      * be a huge win.  So try that, and continue galloping until
00560      * (if ever) neither run appears to be winning consistently
00561      * anymore.
00562      */
00563     ++min_gallop;
00564     do 
00565       {
00566         min_gallop -= min_gallop > 1;
00567         ms.min_gallop = min_gallop;
00568         k = gallop_right(*pb, pa, na, 0);
00569         acount = k;
00570         if (k) 
00571           {
00572             if (k < 0)
00573               goto Fail;
00574             memcpy(dest, pa, k * sizeof(T));
00575             dest += k;
00576             pa += k;
00577             na -= k;
00578             if (na == 1)
00579               goto CopyB;
00580             /* na==0 is impossible now if the comparison
00581              * function is consistent, but we can't assume
00582              * that it is.
00583              */
00584             if (na == 0)
00585               goto Succeed;
00586           }
00587         *dest++ = *pb++;
00588         --nb;
00589         if (nb == 0)
00590           goto Succeed;
00591 
00592         k = gallop_left(*pa, pb, nb, 0);
00593         bcount = k;
00594         if (k) {
00595           if (k < 0)
00596             goto Fail;
00597           memmove(dest, pb, k * sizeof(T));
00598           dest += k;
00599           pb += k;
00600           nb -= k;
00601           if (nb == 0)
00602             goto Succeed;
00603         }
00604         *dest++ = *pa++;
00605         --na;
00606         if (na == 1)
00607           goto CopyB;
00608       } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
00609     ++min_gallop;       /* penalize it for leaving galloping mode */
00610     ms.min_gallop = min_gallop;
00611   }
00612  Succeed:
00613   result = 0;
00614  Fail:
00615   if (na)
00616     memcpy(dest, pa, na * sizeof(T));
00617   return result;
00618  CopyB:
00619   /* The last element of pa belongs at the end of the merge. */
00620   memmove(dest, pb, nb * sizeof(T));
00621   dest[nb] = *pa;
00622   return 0;
00623 }
00624 
00625 /* Merge the na elements starting at pa with the nb elements starting at pb
00626  * in a stable way, in-place.  na and nb must be > 0, and pa + na == pb.
00627  * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
00628  * merge, and should have na >= nb.  See listsort.txt for more info.
00629  * Return 0 if successful, -1 if error.
00630  */
00631 template <class T>
00632 int
00633 octave_sort<T>::merge_hi(T *pa, int na, T *pb, int nb)
00634 {
00635   int k;
00636   T *dest;
00637   int result = -1;      /* guilty until proved innocent */
00638   T *basea;
00639   T *baseb;
00640   int min_gallop = ms.min_gallop;
00641 
00642   if (MERGE_GETMEM(nb) < 0)
00643     return -1;
00644   dest = pb + nb - 1;
00645   memcpy(ms.a, pb, nb * sizeof(T));
00646   basea = pa;
00647   baseb = ms.a;
00648   pb = ms.a + nb - 1;
00649   pa += na - 1;
00650 
00651   *dest-- = *pa--;
00652   --na;
00653   if (na == 0)
00654     goto Succeed;
00655   if (nb == 1)
00656     goto CopyA;
00657 
00658   for (;;) 
00659     {
00660       int acount = 0;   /* # of times A won in a row */
00661       int bcount = 0;   /* # of times B won in a row */
00662 
00663       /* Do the straightforward thing until (if ever) one run
00664        * appears to win consistently.
00665        */
00666       for (;;) 
00667         {
00668           IFLT (*pb, *pa)
00669             {
00670               *dest-- = *pa--;
00671               ++acount;
00672               bcount = 0;
00673               --na;
00674               if (na == 0)
00675                 goto Succeed;
00676               if (acount >= min_gallop)
00677                 break;
00678             }
00679           else 
00680             {
00681               *dest-- = *pb--;
00682               ++bcount;
00683               acount = 0;
00684               --nb;
00685               if (nb == 1)
00686                 goto CopyA;
00687               if (bcount >= min_gallop)
00688                 break;
00689             }
00690         }
00691 
00692       /* One run is winning so consistently that galloping may
00693        * be a huge win.  So try that, and continue galloping until
00694        * (if ever) neither run appears to be winning consistently
00695        * anymore.
00696        */
00697       ++min_gallop;
00698       do 
00699         {
00700           min_gallop -= min_gallop > 1;
00701           ms.min_gallop = min_gallop;
00702           k = gallop_right(*pb, basea, na, na-1);
00703           if (k < 0)
00704             goto Fail;
00705           k = na - k;
00706           acount = k;
00707           if (k) 
00708             {
00709               dest -= k;
00710               pa -= k;
00711               memmove(dest+1, pa+1, k * sizeof(T ));
00712               na -= k;
00713               if (na == 0)
00714                 goto Succeed;
00715             }
00716           *dest-- = *pb--;
00717           --nb;
00718           if (nb == 1)
00719             goto CopyA;
00720 
00721           k = gallop_left(*pa, baseb, nb, nb-1);
00722           if (k < 0)
00723             goto Fail;
00724           k = nb - k;
00725           bcount = k;
00726           if (k) 
00727             {
00728               dest -= k;
00729               pb -= k;
00730               memcpy(dest+1, pb+1, k * sizeof(T));
00731               nb -= k;
00732               if (nb == 1)
00733                 goto CopyA;
00734               /* nb==0 is impossible now if the comparison
00735                * function is consistent, but we can't assume
00736                * that it is.
00737                */
00738               if (nb == 0)
00739                 goto Succeed;
00740             }
00741           *dest-- = *pa--;
00742           --na;
00743           if (na == 0)
00744             goto Succeed;
00745         } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
00746       ++min_gallop;     /* penalize it for leaving galloping mode */
00747       ms.min_gallop = min_gallop;
00748     }
00749 Succeed:
00750   result = 0;
00751 Fail:
00752   if (nb)
00753     memcpy(dest-(nb-1), baseb, nb * sizeof(T));
00754   return result;
00755 CopyA:
00756   /* The first element of pb belongs at the front of the merge. */
00757   dest -= na;
00758   pa -= na;
00759   memmove(dest+1, pa+1, na * sizeof(T));
00760   *dest = *pb;
00761   return 0;
00762 }
00763 
00764 /* Merge the two runs at stack indices i and i+1.
00765  * Returns 0 on success, -1 on error.
00766  */
00767 template <class T>
00768 int
00769 octave_sort<T>::merge_at(int i)
00770 {
00771   T *pa, *pb;
00772   int na, nb;
00773   int k;
00774 
00775   pa = ms.pending[i].base;
00776   na = ms.pending[i].len;
00777   pb = ms.pending[i+1].base;
00778   nb = ms.pending[i+1].len;
00779 
00780   /* Record the length of the combined runs; if i is the 3rd-last
00781    * run now, also slide over the last run (which isn't involved
00782    * in this merge).  The current run i+1 goes away in any case.
00783    */
00784   ms.pending[i].len = na + nb;
00785   if (i == ms.n - 3)
00786     ms.pending[i+1] = ms.pending[i+2];
00787   --ms.n;
00788 
00789   /* Where does b start in a?  Elements in a before that can be
00790    * ignored (already in place).
00791    */
00792   k = gallop_right(*pb, pa, na, 0);
00793   if (k < 0)
00794     return -1;
00795   pa += k;
00796   na -= k;
00797   if (na == 0)
00798     return 0;
00799 
00800   /* Where does a end in b?  Elements in b after that can be
00801    * ignored (already in place).
00802    */
00803   nb = gallop_left(pa[na-1], pb, nb, nb-1);
00804   if (nb <= 0)
00805     return nb;
00806 
00807   /* Merge what remains of the runs, using a temp array with
00808    * min(na, nb) elements.
00809    */
00810   if (na <= nb)
00811     return merge_lo(pa, na, pb, nb);
00812   else
00813     return merge_hi(pa, na, pb, nb);
00814 }
00815 
00816 /* Examine the stack of runs waiting to be merged, merging adjacent runs
00817  * until the stack invariants are re-established:
00818  *
00819  * 1. len[-3] > len[-2] + len[-1]
00820  * 2. len[-2] > len[-1]
00821  *
00822  * See listsort.txt for more info.
00823  *
00824  * Returns 0 on success, -1 on error.
00825  */
00826 template <class T>
00827 int
00828 octave_sort<T>::merge_collapse(void)
00829 {
00830   struct s_slice *p = ms.pending;
00831 
00832   while (ms.n > 1) 
00833     {
00834       int n = ms.n - 2;
00835       if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) 
00836         {
00837           if (p[n-1].len < p[n+1].len)
00838             --n;
00839           if (merge_at(n) < 0)
00840             return -1;
00841         }
00842       else if (p[n].len <= p[n+1].len) 
00843         {
00844           if (merge_at(n) < 0)
00845             return -1;
00846         }
00847       else
00848         break;
00849     }
00850   return 0;
00851 }
00852 
00853 /* Regardless of invariants, merge all runs on the stack until only one
00854  * remains.  This is used at the end of the mergesort.
00855  *
00856  * Returns 0 on success, -1 on error.
00857  */
00858 template <class T>
00859 int
00860 octave_sort<T>::merge_force_collapse(void)
00861 {
00862   struct s_slice *p = ms.pending;
00863 
00864   while (ms.n > 1) 
00865     {
00866       int n = ms.n - 2;
00867       if (n > 0 && p[n-1].len < p[n+1].len)
00868         --n;
00869       if (merge_at(n) < 0)
00870         return -1;
00871     }
00872   return 0;
00873 }
00874 
00875 /* Compute a good value for the minimum run length; natural runs shorter
00876  * than this are boosted artificially via binary insertion.
00877  *
00878  * If n < 64, return n (it's too small to bother with fancy stuff).
00879  * Else if n is an exact power of 2, return 32.
00880  * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
00881  * strictly less than, an exact power of 2.
00882  *
00883  * See listsort.txt for more info.
00884  */
00885 template <class T>
00886 int
00887 octave_sort<T>::merge_compute_minrun(int n)
00888 {
00889   int r = 0;    /* becomes 1 if any 1 bits are shifted off */
00890 
00891   while (n >= 64) {
00892     r |= n & 1;
00893     n >>= 1;
00894   }
00895   return n + r;
00896 }
00897 
00898 template <class T>
00899 void
00900 octave_sort<T>::sort (T *v, int elements)
00901 {
00902   /* Re-initialize the Mergestate as this might be the second time called */
00903   ms.n = 0;
00904   ms.min_gallop = MIN_GALLOP;
00905 
00906   if (elements > 1)
00907     {
00908       int nremaining = elements; 
00909       T *lo = v;
00910       T *hi = v + elements;
00911 
00912       /* March over the array once, left to right, finding natural runs,
00913        * and extending short natural runs to minrun elements.
00914        */
00915       int minrun = merge_compute_minrun(nremaining);
00916       do 
00917         {
00918           int descending;
00919           int n;
00920 
00921           /* Identify next run. */
00922           n = count_run(lo, hi, &descending);
00923           if (n < 0)
00924             goto fail;
00925           if (descending)
00926             reverse_slice(lo, lo + n);
00927           /* If short, extend to min(minrun, nremaining). */
00928           if (n < minrun) 
00929             {
00930               const int force = nremaining <= minrun ? nremaining : minrun;
00931               binarysort(lo, lo + force, lo + n);
00932               n = force;
00933             }
00934           /* Push run onto pending-runs stack, and maybe merge. */
00935           assert(ms.n < MAX_MERGE_PENDING);
00936           ms.pending[ms.n].base = lo;
00937           ms.pending[ms.n].len = n;
00938           ++ms.n;
00939           if (merge_collapse( ) < 0)
00940             goto fail;
00941           /* Advance to find next run. */
00942           lo += n;
00943           nremaining -= n;
00944         } while (nremaining);
00945 
00946       merge_force_collapse( );
00947     }
00948 
00949 fail:
00950   return;
00951 }
00952 
00953 /*
00954 ;;; Local Variables: ***
00955 ;;; mode: C++ ***
00956 ;;; End: ***
00957 */
SEO		[PR] 爆速!無料ブログ無料ホームページ開設無料ライブ放送