Source file src/sort/sort.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run gen_sort_variants.go 6 7 // Package sort provides primitives for sorting slices and user-defined collections. 8 package sort 9 10 import ( 11 "math/bits" 12 "slices" 13 ) 14 15 // An implementation of Interface can be sorted by the routines in this package. 16 // The methods refer to elements of the underlying collection by integer index. 17 type Interface interface { 18 // Len is the number of elements in the collection. 19 Len() int 20 21 // Less reports whether the element with index i 22 // must sort before the element with index j. 23 // 24 // If both Less(i, j) and Less(j, i) are false, 25 // then the elements at index i and j are considered equal. 26 // Sort may place equal elements in any order in the final result, 27 // while Stable preserves the original input order of equal elements. 28 // 29 // Less must describe a [Strict Weak Ordering]. For example: 30 // - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well. 31 // - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well. 32 // 33 // Note that floating-point comparison (the < operator on float32 or float64 values) 34 // is not a strict weak ordering when not-a-number (NaN) values are involved. 35 // See Float64Slice.Less for a correct implementation for floating-point values. 36 // 37 // [Strict Weak Ordering]: https://en.wikipedia.org/wiki/Weak_ordering#Strict_weak_orderings 38 Less(i, j int) bool 39 40 // Swap swaps the elements with indexes i and j. 41 Swap(i, j int) 42 } 43 44 // Sort sorts data in ascending order as determined by the Less method. 45 // It makes one call to data.Len to determine n and O(n*log(n)) calls to 46 // data.Less and data.Swap. The sort is not guaranteed to be stable. 47 // 48 // Note: in many situations, the newer [slices.SortFunc] function is more 49 // ergonomic and runs faster. 50 func Sort(data Interface) { 51 n := data.Len() 52 if n <= 1 { 53 return 54 } 55 limit := bits.Len(uint(n)) 56 pdqsort(data, 0, n, limit) 57 } 58 59 type sortedHint int // hint for pdqsort when choosing the pivot 60 61 const ( 62 unknownHint sortedHint = iota 63 increasingHint 64 decreasingHint 65 ) 66 67 // xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf 68 type xorshift uint64 69 70 func (r *xorshift) Next() uint64 { 71 *r ^= *r << 13 72 *r ^= *r >> 7 73 *r ^= *r << 17 74 return uint64(*r) 75 } 76 77 func nextPowerOfTwo(length int) uint { 78 shift := uint(bits.Len(uint(length))) 79 return uint(1 << shift) 80 } 81 82 // lessSwap is a pair of Less and Swap function for use with the 83 // auto-generated func-optimized variant of sort.go in 84 // zfuncversion.go. 85 type lessSwap struct { 86 Less func(i, j int) bool 87 Swap func(i, j int) 88 } 89 90 type reverse struct { 91 // This embedded Interface permits Reverse to use the methods of 92 // another Interface implementation. 93 Interface 94 } 95 96 // Less returns the opposite of the embedded implementation's Less method. 97 func (r reverse) Less(i, j int) bool { 98 return r.Interface.Less(j, i) 99 } 100 101 // Reverse returns the reverse order for data. 102 func Reverse(data Interface) Interface { 103 return &reverse{data} 104 } 105 106 // IsSorted reports whether data is sorted. 107 // 108 // Note: in many situations, the newer [slices.IsSortedFunc] function is more 109 // ergonomic and runs faster. 110 func IsSorted(data Interface) bool { 111 n := data.Len() 112 for i := n - 1; i > 0; i-- { 113 if data.Less(i, i-1) { 114 return false 115 } 116 } 117 return true 118 } 119 120 // Convenience types for common cases 121 122 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 123 type IntSlice []int 124 125 func (x IntSlice) Len() int { return len(x) } 126 func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] } 127 func (x IntSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 128 129 // Sort is a convenience method: x.Sort() calls Sort(x). 130 func (x IntSlice) Sort() { Sort(x) } 131 132 // Float64Slice implements Interface for a []float64, sorting in increasing order, 133 // with not-a-number (NaN) values ordered before other values. 134 type Float64Slice []float64 135 136 func (x Float64Slice) Len() int { return len(x) } 137 138 // Less reports whether x[i] should be ordered before x[j], as required by the sort Interface. 139 // Note that floating-point comparison by itself is not a transitive relation: it does not 140 // report a consistent ordering for not-a-number (NaN) values. 141 // This implementation of Less places NaN values before any others, by using: 142 // 143 // x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j])) 144 func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) } 145 func (x Float64Slice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 146 147 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 148 func isNaN(f float64) bool { 149 return f != f 150 } 151 152 // Sort is a convenience method: x.Sort() calls Sort(x). 153 func (x Float64Slice) Sort() { Sort(x) } 154 155 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 156 type StringSlice []string 157 158 func (x StringSlice) Len() int { return len(x) } 159 func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] } 160 func (x StringSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 161 162 // Sort is a convenience method: x.Sort() calls Sort(x). 163 func (x StringSlice) Sort() { Sort(x) } 164 165 // Convenience wrappers for common cases 166 167 // Ints sorts a slice of ints in increasing order. 168 // 169 // Note: as of Go 1.22, this function simply calls [slices.Sort]. 170 func Ints(x []int) { slices.Sort(x) } 171 172 // Float64s sorts a slice of float64s in increasing order. 173 // Not-a-number (NaN) values are ordered before other values. 174 // 175 // Note: as of Go 1.22, this function simply calls [slices.Sort]. 176 func Float64s(x []float64) { slices.Sort(x) } 177 178 // Strings sorts a slice of strings in increasing order. 179 // 180 // Note: as of Go 1.22, this function simply calls [slices.Sort]. 181 func Strings(x []string) { slices.Sort(x) } 182 183 // IntsAreSorted reports whether the slice x is sorted in increasing order. 184 // 185 // Note: as of Go 1.22, this function simply calls [slices.IsSorted]. 186 func IntsAreSorted(x []int) bool { return slices.IsSorted(x) } 187 188 // Float64sAreSorted reports whether the slice x is sorted in increasing order, 189 // with not-a-number (NaN) values before any other values. 190 // 191 // Note: as of Go 1.22, this function simply calls [slices.IsSorted]. 192 func Float64sAreSorted(x []float64) bool { return slices.IsSorted(x) } 193 194 // StringsAreSorted reports whether the slice x is sorted in increasing order. 195 // 196 // Note: as of Go 1.22, this function simply calls [slices.IsSorted]. 197 func StringsAreSorted(x []string) bool { return slices.IsSorted(x) } 198 199 // Notes on stable sorting: 200 // The used algorithms are simple and provable correct on all input and use 201 // only logarithmic additional stack space. They perform well if compared 202 // experimentally to other stable in-place sorting algorithms. 203 // 204 // Remarks on other algorithms evaluated: 205 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 206 // Not faster. 207 // - GCC's __rotate for block rotations: Not faster. 208 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 209 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 210 // The given algorithms are in-place, number of Swap and Assignments 211 // grow as n log n but the algorithm is not stable. 212 // - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and 213 // V. Raman in Algorithmica (1996) 16, 115-160: 214 // This algorithm either needs additional 2n bits or works only if there 215 // are enough different elements available to encode some permutations 216 // which have to be undone later (so not stable on any input). 217 // - All the optimal in-place sorting/merging algorithms I found are either 218 // unstable or rely on enough different elements in each step to encode the 219 // performed block rearrangements. See also "In-Place Merging Algorithms", 220 // Denham Coates-Evely, Department of Computer Science, Kings College, 221 // January 2004 and the references in there. 222 // - Often "optimal" algorithms are optimal in the number of assignments 223 // but Interface has only Swap as operation. 224 225 // Stable sorts data in ascending order as determined by the Less method, 226 // while keeping the original order of equal elements. 227 // 228 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 229 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 230 // 231 // Note: in many situations, the newer slices.SortStableFunc function is more 232 // ergonomic and runs faster. 233 func Stable(data Interface) { 234 stable(data, data.Len()) 235 } 236 237 /* 238 Complexity of Stable Sorting 239 240 241 Complexity of block swapping rotation 242 243 Each Swap puts one new element into its correct, final position. 244 Elements which reach their final position are no longer moved. 245 Thus block swapping rotation needs |u|+|v| calls to Swaps. 246 This is best possible as each element might need a move. 247 248 Pay attention when comparing to other optimal algorithms which 249 typically count the number of assignments instead of swaps: 250 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 251 rotations uses O(u + v + gcd(u,v)) assignments which is 252 better than our O(3 * (u+v)) as gcd(u,v) <= u. 253 254 255 Stable sorting by SymMerge and BlockSwap rotations 256 257 SymMerg complexity for same size input M = N: 258 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 259 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 260 261 (The following argument does not fuzz over a missing -1 or 262 other stuff which does not impact the final result). 263 264 Let n = data.Len(). Assume n = 2^k. 265 266 Plain merge sort performs log(n) = k iterations. 267 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 268 269 Thus iteration i of merge sort performs: 270 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 271 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 272 273 In total k = log(n) iterations are performed; so in total: 274 Calls to Less O(log(n) * n) 275 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 276 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 277 278 279 Above results should generalize to arbitrary n = 2^k + p 280 and should not be influenced by the initial insertion sort phase: 281 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 282 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 283 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 284 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 285 = O(n * log(n)) 286 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 287 288 */ 289