Source file src/sort/sort.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run gen_sort_variants.go
     6  
     7  // Package sort provides primitives for sorting slices and user-defined collections.
     8  package sort
     9  
    10  import (
    11  	"math/bits"
    12  	"slices"
    13  )
    14  
    15  // An implementation of Interface can be sorted by the routines in this package.
    16  // The methods refer to elements of the underlying collection by integer index.
    17  type Interface interface {
    18  	// Len is the number of elements in the collection.
    19  	Len() int
    20  
    21  	// Less reports whether the element with index i
    22  	// must sort before the element with index j.
    23  	//
    24  	// If both Less(i, j) and Less(j, i) are false,
    25  	// then the elements at index i and j are considered equal.
    26  	// Sort may place equal elements in any order in the final result,
    27  	// while Stable preserves the original input order of equal elements.
    28  	//
    29  	// Less must describe a [Strict Weak Ordering]. For example:
    30  	//  - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well.
    31  	//  - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well.
    32  	//
    33  	// Note that floating-point comparison (the < operator on float32 or float64 values)
    34  	// is not a strict weak ordering when not-a-number (NaN) values are involved.
    35  	// See Float64Slice.Less for a correct implementation for floating-point values.
    36  	//
    37  	// [Strict Weak Ordering]: https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings
    38  	Less(i, j int) bool
    39  
    40  	// Swap swaps the elements with indexes i and j.
    41  	Swap(i, j int)
    42  }
    43  
    44  // Sort sorts data in ascending order as determined by the Less method.
    45  // It makes one call to data.Len to determine n and O(n*log(n)) calls to
    46  // data.Less and data.Swap. The sort is not guaranteed to be stable.
    47  //
    48  // Note: in many situations, the newer [slices.SortFunc] function is more
    49  // ergonomic and runs faster.
    50  func Sort(data Interface) {
    51  	n := data.Len()
    52  	if n <= 1 {
    53  		return
    54  	}
    55  	limit := bits.Len(uint(n))
    56  	pdqsort(data, 0, n, limit)
    57  }
    58  
    59  type sortedHint int // hint for pdqsort when choosing the pivot
    60  
    61  const (
    62  	unknownHint sortedHint = iota
    63  	increasingHint
    64  	decreasingHint
    65  )
    66  
    67  // xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf
    68  type xorshift uint64
    69  
    70  func (r *xorshift) Next() uint64 {
    71  	*r ^= *r << 13
    72  	*r ^= *r >> 7
    73  	*r ^= *r << 17
    74  	return uint64(*r)
    75  }
    76  
    77  func nextPowerOfTwo(length int) uint {
    78  	shift := uint(bits.Len(uint(length)))
    79  	return uint(1 << shift)
    80  }
    81  
    82  // lessSwap is a pair of Less and Swap function for use with the
    83  // auto-generated func-optimized variant of sort.go in
    84  // zfuncversion.go.
    85  type lessSwap struct {
    86  	Less func(i, j int) bool
    87  	Swap func(i, j int)
    88  }
    89  
    90  type reverse struct {
    91  	// This embedded Interface permits Reverse to use the methods of
    92  	// another Interface implementation.
    93  	Interface
    94  }
    95  
    96  // Less returns the opposite of the embedded implementation's Less method.
    97  func (r reverse) Less(i, j int) bool {
    98  	return r.Interface.Less(j, i)
    99  }
   100  
   101  // Reverse returns the reverse order for data.
   102  func Reverse(data Interface) Interface {
   103  	return &reverse{data}
   104  }
   105  
   106  // IsSorted reports whether data is sorted.
   107  //
   108  // Note: in many situations, the newer [slices.IsSortedFunc] function is more
   109  // ergonomic and runs faster.
   110  func IsSorted(data Interface) bool {
   111  	n := data.Len()
   112  	for i := n - 1; i > 0; i-- {
   113  		if data.Less(i, i-1) {
   114  			return false
   115  		}
   116  	}
   117  	return true
   118  }
   119  
   120  // Convenience types for common cases
   121  
   122  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   123  type IntSlice []int
   124  
   125  func (x IntSlice) Len() int           { return len(x) }
   126  func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] }
   127  func (x IntSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   128  
   129  // Sort is a convenience method: x.Sort() calls Sort(x).
   130  func (x IntSlice) Sort() { Sort(x) }
   131  
   132  // Float64Slice implements Interface for a []float64, sorting in increasing order,
   133  // with not-a-number (NaN) values ordered before other values.
   134  type Float64Slice []float64
   135  
   136  func (x Float64Slice) Len() int { return len(x) }
   137  
   138  // Less reports whether x[i] should be ordered before x[j], as required by the sort Interface.
   139  // Note that floating-point comparison by itself is not a transitive relation: it does not
   140  // report a consistent ordering for not-a-number (NaN) values.
   141  // This implementation of Less places NaN values before any others, by using:
   142  //
   143  //	x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j]))
   144  func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) }
   145  func (x Float64Slice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   146  
   147  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   148  func isNaN(f float64) bool {
   149  	return f != f
   150  }
   151  
   152  // Sort is a convenience method: x.Sort() calls Sort(x).
   153  func (x Float64Slice) Sort() { Sort(x) }
   154  
   155  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   156  type StringSlice []string
   157  
   158  func (x StringSlice) Len() int           { return len(x) }
   159  func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] }
   160  func (x StringSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   161  
   162  // Sort is a convenience method: x.Sort() calls Sort(x).
   163  func (x StringSlice) Sort() { Sort(x) }
   164  
   165  // Convenience wrappers for common cases
   166  
   167  // Ints sorts a slice of ints in increasing order.
   168  //
   169  // Note: as of Go 1.22, this function simply calls [slices.Sort].
   170  func Ints(x []int) { slices.Sort(x) }
   171  
   172  // Float64s sorts a slice of float64s in increasing order.
   173  // Not-a-number (NaN) values are ordered before other values.
   174  //
   175  // Note: as of Go 1.22, this function simply calls [slices.Sort].
   176  func Float64s(x []float64) { slices.Sort(x) }
   177  
   178  // Strings sorts a slice of strings in increasing order.
   179  //
   180  // Note: as of Go 1.22, this function simply calls [slices.Sort].
   181  func Strings(x []string) { slices.Sort(x) }
   182  
   183  // IntsAreSorted reports whether the slice x is sorted in increasing order.
   184  //
   185  // Note: as of Go 1.22, this function simply calls [slices.IsSorted].
   186  func IntsAreSorted(x []int) bool { return slices.IsSorted(x) }
   187  
   188  // Float64sAreSorted reports whether the slice x is sorted in increasing order,
   189  // with not-a-number (NaN) values before any other values.
   190  //
   191  // Note: as of Go 1.22, this function simply calls [slices.IsSorted].
   192  func Float64sAreSorted(x []float64) bool { return slices.IsSorted(x) }
   193  
   194  // StringsAreSorted reports whether the slice x is sorted in increasing order.
   195  //
   196  // Note: as of Go 1.22, this function simply calls [slices.IsSorted].
   197  func StringsAreSorted(x []string) bool { return slices.IsSorted(x) }
   198  
   199  // Notes on stable sorting:
   200  // The used algorithms are simple and provable correct on all input and use
   201  // only logarithmic additional stack space. They perform well if compared
   202  // experimentally to other stable in-place sorting algorithms.
   203  //
   204  // Remarks on other algorithms evaluated:
   205  //  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   206  //    Not faster.
   207  //  - GCC's __rotate for block rotations: Not faster.
   208  //  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   209  //    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   210  //    The given algorithms are in-place, number of Swap and Assignments
   211  //    grow as n log n but the algorithm is not stable.
   212  //  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   213  //    V. Raman in Algorithmica (1996) 16, 115-160:
   214  //    This algorithm either needs additional 2n bits or works only if there
   215  //    are enough different elements available to encode some permutations
   216  //    which have to be undone later (so not stable on any input).
   217  //  - All the optimal in-place sorting/merging algorithms I found are either
   218  //    unstable or rely on enough different elements in each step to encode the
   219  //    performed block rearrangements. See also "In-Place Merging Algorithms",
   220  //    Denham Coates-Evely, Department of Computer Science, Kings College,
   221  //    January 2004 and the references in there.
   222  //  - Often "optimal" algorithms are optimal in the number of assignments
   223  //    but Interface has only Swap as operation.
   224  
   225  // Stable sorts data in ascending order as determined by the Less method,
   226  // while keeping the original order of equal elements.
   227  //
   228  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
   229  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   230  //
   231  // Note: in many situations, the newer slices.SortStableFunc function is more
   232  // ergonomic and runs faster.
   233  func Stable(data Interface) {
   234  	stable(data, data.Len())
   235  }
   236  
   237  /*
   238  Complexity of Stable Sorting
   239  
   240  
   241  Complexity of block swapping rotation
   242  
   243  Each Swap puts one new element into its correct, final position.
   244  Elements which reach their final position are no longer moved.
   245  Thus block swapping rotation needs |u|+|v| calls to Swaps.
   246  This is best possible as each element might need a move.
   247  
   248  Pay attention when comparing to other optimal algorithms which
   249  typically count the number of assignments instead of swaps:
   250  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   251  rotations uses O(u + v + gcd(u,v)) assignments which is
   252  better than our O(3 * (u+v)) as gcd(u,v) <= u.
   253  
   254  
   255  Stable sorting by SymMerge and BlockSwap rotations
   256  
   257  SymMerg complexity for same size input M = N:
   258  Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   259  Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   260  
   261  (The following argument does not fuzz over a missing -1 or
   262  other stuff which does not impact the final result).
   263  
   264  Let n = data.Len(). Assume n = 2^k.
   265  
   266  Plain merge sort performs log(n) = k iterations.
   267  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   268  
   269  Thus iteration i of merge sort performs:
   270  Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   271  Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   272  
   273  In total k = log(n) iterations are performed; so in total:
   274  Calls to Less O(log(n) * n)
   275  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   276     = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   277  
   278  
   279  Above results should generalize to arbitrary n = 2^k + p
   280  and should not be influenced by the initial insertion sort phase:
   281  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   282  size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   283  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   284  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   285     = O(n * log(n))
   286  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   287  
   288  */
   289  

View as plain text