Source file src/sort/sort.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run gen_sort_variants.go
     6  
     7  // Package sort provides primitives for sorting slices and user-defined collections.
     8  package sort
     9  
    10  import (
    11  	"math/bits"
    12  	"slices"
    13  )
    14  
    15  // An implementation of Interface can be sorted by the routines in this package.
    16  // The methods refer to elements of the underlying collection by integer index.
    17  type Interface interface {
    18  	// Len is the number of elements in the collection.
    19  	Len() int
    20  
    21  	// Less reports whether the element with index i
    22  	// must sort before the element with index j.
    23  	//
    24  	// If both Less(i, j) and Less(j, i) are false,
    25  	// then the elements at index i and j are considered equal.
    26  	// Sort may place equal elements in any order in the final result,
    27  	// while Stable preserves the original input order of equal elements.
    28  	//
    29  	// Less must describe a transitive ordering:
    30  	//  - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well.
    31  	//  - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well.
    32  	//
    33  	// Note that floating-point comparison (the < operator on float32 or float64 values)
    34  	// is not a transitive ordering when not-a-number (NaN) values are involved.
    35  	// See Float64Slice.Less for a correct implementation for floating-point values.
    36  	Less(i, j int) bool
    37  
    38  	// Swap swaps the elements with indexes i and j.
    39  	Swap(i, j int)
    40  }
    41  
    42  // Sort sorts data in ascending order as determined by the Less method.
    43  // It makes one call to data.Len to determine n and O(n*log(n)) calls to
    44  // data.Less and data.Swap. The sort is not guaranteed to be stable.
    45  //
    46  // Note: in many situations, the newer [slices.SortFunc] function is more
    47  // ergonomic and runs faster.
    48  func Sort(data Interface) {
    49  	n := data.Len()
    50  	if n <= 1 {
    51  		return
    52  	}
    53  	limit := bits.Len(uint(n))
    54  	pdqsort(data, 0, n, limit)
    55  }
    56  
    57  type sortedHint int // hint for pdqsort when choosing the pivot
    58  
    59  const (
    60  	unknownHint sortedHint = iota
    61  	increasingHint
    62  	decreasingHint
    63  )
    64  
    65  // xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
    66  type xorshift uint64
    67  
    68  func (r *xorshift) Next() uint64 {
    69  	*r ^= *r << 13
    70  	*r ^= *r >> 7
    71  	*r ^= *r << 17
    72  	return uint64(*r)
    73  }
    74  
    75  func nextPowerOfTwo(length int) uint {
    76  	shift := uint(bits.Len(uint(length)))
    77  	return uint(1 << shift)
    78  }
    79  
    80  // lessSwap is a pair of Less and Swap function for use with the
    81  // auto-generated func-optimized variant of sort.go in
    82  // zfuncversion.go.
    83  type lessSwap struct {
    84  	Less func(i, j int) bool
    85  	Swap func(i, j int)
    86  }
    87  
    88  type reverse struct {
    89  	// This embedded Interface permits Reverse to use the methods of
    90  	// another Interface implementation.
    91  	Interface
    92  }
    93  
    94  // Less returns the opposite of the embedded implementation's Less method.
    95  func (r reverse) Less(i, j int) bool {
    96  	return r.Interface.Less(j, i)
    97  }
    98  
    99  // Reverse returns the reverse order for data.
   100  func Reverse(data Interface) Interface {
   101  	return &reverse{data}
   102  }
   103  
   104  // IsSorted reports whether data is sorted.
   105  //
   106  // Note: in many situations, the newer [slices.IsSortedFunc] function is more
   107  // ergonomic and runs faster.
   108  func IsSorted(data Interface) bool {
   109  	n := data.Len()
   110  	for i := n - 1; i > 0; i-- {
   111  		if data.Less(i, i-1) {
   112  			return false
   113  		}
   114  	}
   115  	return true
   116  }
   117  
   118  // Convenience types for common cases
   119  
   120  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   121  type IntSlice []int
   122  
   123  func (x IntSlice) Len() int           { return len(x) }
   124  func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] }
   125  func (x IntSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   126  
   127  // Sort is a convenience method: x.Sort() calls Sort(x).
   128  func (x IntSlice) Sort() { Sort(x) }
   129  
   130  // Float64Slice implements Interface for a []float64, sorting in increasing order,
   131  // with not-a-number (NaN) values ordered before other values.
   132  type Float64Slice []float64
   133  
   134  func (x Float64Slice) Len() int { return len(x) }
   135  
   136  // Less reports whether x[i] should be ordered before x[j], as required by the sort Interface.
   137  // Note that floating-point comparison by itself is not a transitive relation: it does not
   138  // report a consistent ordering for not-a-number (NaN) values.
   139  // This implementation of Less places NaN values before any others, by using:
   140  //
   141  //	x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j]))
   142  func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) }
   143  func (x Float64Slice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   144  
   145  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   146  func isNaN(f float64) bool {
   147  	return f != f
   148  }
   149  
   150  // Sort is a convenience method: x.Sort() calls Sort(x).
   151  func (x Float64Slice) Sort() { Sort(x) }
   152  
   153  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   154  type StringSlice []string
   155  
   156  func (x StringSlice) Len() int           { return len(x) }
   157  func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] }
   158  func (x StringSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   159  
   160  // Sort is a convenience method: x.Sort() calls Sort(x).
   161  func (x StringSlice) Sort() { Sort(x) }
   162  
   163  // Convenience wrappers for common cases
   164  
   165  // Ints sorts a slice of ints in increasing order.
   166  //
   167  // Note: as of Go 1.22, this function simply calls [slices.Sort].
   168  func Ints(x []int) { slices.Sort(x) }
   169  
   170  // Float64s sorts a slice of float64s in increasing order.
   171  // Not-a-number (NaN) values are ordered before other values.
   172  //
   173  // Note: as of Go 1.22, this function simply calls [slices.Sort].
   174  func Float64s(x []float64) { slices.Sort(x) }
   175  
   176  // Strings sorts a slice of strings in increasing order.
   177  //
   178  // Note: as of Go 1.22, this function simply calls [slices.Sort].
   179  func Strings(x []string) { slices.Sort(x) }
   180  
   181  // IntsAreSorted reports whether the slice x is sorted in increasing order.
   182  //
   183  // Note: as of Go 1.22, this function simply calls [slices.IsSorted].
   184  func IntsAreSorted(x []int) bool { return slices.IsSorted(x) }
   185  
   186  // Float64sAreSorted reports whether the slice x is sorted in increasing order,
   187  // with not-a-number (NaN) values before any other values.
   188  //
   189  // Note: as of Go 1.22, this function simply calls [slices.IsSorted].
   190  func Float64sAreSorted(x []float64) bool { return slices.IsSorted(x) }
   191  
   192  // StringsAreSorted reports whether the slice x is sorted in increasing order.
   193  //
   194  // Note: as of Go 1.22, this function simply calls [slices.IsSorted].
   195  func StringsAreSorted(x []string) bool { return slices.IsSorted(x) }
   196  
   197  // Notes on stable sorting:
   198  // The used algorithms are simple and provable correct on all input and use
   199  // only logarithmic additional stack space. They perform well if compared
   200  // experimentally to other stable in-place sorting algorithms.
   201  //
   202  // Remarks on other algorithms evaluated:
   203  //  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   204  //    Not faster.
   205  //  - GCC's __rotate for block rotations: Not faster.
   206  //  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   207  //    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   208  //    The given algorithms are in-place, number of Swap and Assignments
   209  //    grow as n log n but the algorithm is not stable.
   210  //  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   211  //    V. Raman in Algorithmica (1996) 16, 115-160:
   212  //    This algorithm either needs additional 2n bits or works only if there
   213  //    are enough different elements available to encode some permutations
   214  //    which have to be undone later (so not stable on any input).
   215  //  - All the optimal in-place sorting/merging algorithms I found are either
   216  //    unstable or rely on enough different elements in each step to encode the
   217  //    performed block rearrangements. See also "In-Place Merging Algorithms",
   218  //    Denham Coates-Evely, Department of Computer Science, Kings College,
   219  //    January 2004 and the references in there.
   220  //  - Often "optimal" algorithms are optimal in the number of assignments
   221  //    but Interface has only Swap as operation.
   222  
   223  // Stable sorts data in ascending order as determined by the Less method,
   224  // while keeping the original order of equal elements.
   225  //
   226  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
   227  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   228  //
   229  // Note: in many situations, the newer slices.SortStableFunc function is more
   230  // ergonomic and runs faster.
   231  func Stable(data Interface) {
   232  	stable(data, data.Len())
   233  }
   234  
   235  /*
   236  Complexity of Stable Sorting
   237  
   238  
   239  Complexity of block swapping rotation
   240  
   241  Each Swap puts one new element into its correct, final position.
   242  Elements which reach their final position are no longer moved.
   243  Thus block swapping rotation needs |u|+|v| calls to Swaps.
   244  This is best possible as each element might need a move.
   245  
   246  Pay attention when comparing to other optimal algorithms which
   247  typically count the number of assignments instead of swaps:
   248  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   249  rotations uses O(u + v + gcd(u,v)) assignments which is
   250  better than our O(3 * (u+v)) as gcd(u,v) <= u.
   251  
   252  
   253  Stable sorting by SymMerge and BlockSwap rotations
   254  
   255  SymMerg complexity for same size input M = N:
   256  Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   257  Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   258  
   259  (The following argument does not fuzz over a missing -1 or
   260  other stuff which does not impact the final result).
   261  
   262  Let n = data.Len(). Assume n = 2^k.
   263  
   264  Plain merge sort performs log(n) = k iterations.
   265  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   266  
   267  Thus iteration i of merge sort performs:
   268  Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   269  Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   270  
   271  In total k = log(n) iterations are performed; so in total:
   272  Calls to Less O(log(n) * n)
   273  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   274     = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   275  
   276  
   277  Above results should generalize to arbitrary n = 2^k + p
   278  and should not be influenced by the initial insertion sort phase:
   279  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   280  size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   281  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   282  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   283     = O(n * log(n))
   284  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   285  
   286  */
   287  

View as plain text