Source file src/sort/sort.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run gen_sort_variants.go 6 7 // Package sort provides primitives for sorting slices and user-defined collections. 8 package sort 9 10 import "math/bits" 11 12 // An implementation of Interface can be sorted by the routines in this package. 13 // The methods refer to elements of the underlying collection by integer index. 14 type Interface interface { 15 // Len is the number of elements in the collection. 16 Len() int 17 18 // Less reports whether the element with index i 19 // must sort before the element with index j. 20 // 21 // If both Less(i, j) and Less(j, i) are false, 22 // then the elements at index i and j are considered equal. 23 // Sort may place equal elements in any order in the final result, 24 // while Stable preserves the original input order of equal elements. 25 // 26 // Less must describe a transitive ordering: 27 // - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well. 28 // - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well. 29 // 30 // Note that floating-point comparison (the < operator on float32 or float64 values) 31 // is not a transitive ordering when not-a-number (NaN) values are involved. 32 // See Float64Slice.Less for a correct implementation for floating-point values. 33 Less(i, j int) bool 34 35 // Swap swaps the elements with indexes i and j. 36 Swap(i, j int) 37 } 38 39 // Sort sorts data in ascending order as determined by the Less method. 40 // It makes one call to data.Len to determine n and O(n*log(n)) calls to 41 // data.Less and data.Swap. The sort is not guaranteed to be stable. 42 // 43 // Note: in many situations, the newer [slices.SortFunc] function is more 44 // ergonomic and runs faster. 45 func Sort(data Interface) { 46 n := data.Len() 47 if n <= 1 { 48 return 49 } 50 limit := bits.Len(uint(n)) 51 pdqsort(data, 0, n, limit) 52 } 53 54 type sortedHint int // hint for pdqsort when choosing the pivot 55 56 const ( 57 unknownHint sortedHint = iota 58 increasingHint 59 decreasingHint 60 ) 61 62 // xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf 63 type xorshift uint64 64 65 func (r *xorshift) Next() uint64 { 66 *r ^= *r << 13 67 *r ^= *r >> 17 68 *r ^= *r << 5 69 return uint64(*r) 70 } 71 72 func nextPowerOfTwo(length int) uint { 73 shift := uint(bits.Len(uint(length))) 74 return uint(1 << shift) 75 } 76 77 // lessSwap is a pair of Less and Swap function for use with the 78 // auto-generated func-optimized variant of sort.go in 79 // zfuncversion.go. 80 type lessSwap struct { 81 Less func(i, j int) bool 82 Swap func(i, j int) 83 } 84 85 type reverse struct { 86 // This embedded Interface permits Reverse to use the methods of 87 // another Interface implementation. 88 Interface 89 } 90 91 // Less returns the opposite of the embedded implementation's Less method. 92 func (r reverse) Less(i, j int) bool { 93 return r.Interface.Less(j, i) 94 } 95 96 // Reverse returns the reverse order for data. 97 func Reverse(data Interface) Interface { 98 return &reverse{data} 99 } 100 101 // IsSorted reports whether data is sorted. 102 // 103 // Note: in many situations, the newer [slices.IsSortedFunc] function is more 104 // ergonomic and runs faster. 105 func IsSorted(data Interface) bool { 106 n := data.Len() 107 for i := n - 1; i > 0; i-- { 108 if data.Less(i, i-1) { 109 return false 110 } 111 } 112 return true 113 } 114 115 // Convenience types for common cases 116 117 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 118 type IntSlice []int 119 120 func (x IntSlice) Len() int { return len(x) } 121 func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] } 122 func (x IntSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 123 124 // Sort is a convenience method: x.Sort() calls Sort(x). 125 func (x IntSlice) Sort() { Sort(x) } 126 127 // Float64Slice implements Interface for a []float64, sorting in increasing order, 128 // with not-a-number (NaN) values ordered before other values. 129 type Float64Slice []float64 130 131 func (x Float64Slice) Len() int { return len(x) } 132 133 // Less reports whether x[i] should be ordered before x[j], as required by the sort Interface. 134 // Note that floating-point comparison by itself is not a transitive relation: it does not 135 // report a consistent ordering for not-a-number (NaN) values. 136 // This implementation of Less places NaN values before any others, by using: 137 // 138 // x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j])) 139 func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) } 140 func (x Float64Slice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 141 142 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 143 func isNaN(f float64) bool { 144 return f != f 145 } 146 147 // Sort is a convenience method: x.Sort() calls Sort(x). 148 func (x Float64Slice) Sort() { Sort(x) } 149 150 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 151 type StringSlice []string 152 153 func (x StringSlice) Len() int { return len(x) } 154 func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] } 155 func (x StringSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 156 157 // Sort is a convenience method: x.Sort() calls Sort(x). 158 func (x StringSlice) Sort() { Sort(x) } 159 160 // Convenience wrappers for common cases 161 162 // Ints sorts a slice of ints in increasing order. 163 // 164 // Note: as of Go 1.22, this function simply calls [slices.Sort]. 165 func Ints(x []int) { intsImpl(x) } 166 167 // Float64s sorts a slice of float64s in increasing order. 168 // Not-a-number (NaN) values are ordered before other values. 169 // 170 // Note: as of Go 1.22, this function simply calls [slices.Sort]. 171 func Float64s(x []float64) { float64sImpl(x) } 172 173 // Strings sorts a slice of strings in increasing order. 174 // 175 // Note: as of Go 1.22, this function simply calls [slices.Sort]. 176 func Strings(x []string) { stringsImpl(x) } 177 178 // IntsAreSorted reports whether the slice x is sorted in increasing order. 179 // 180 // Note: as of Go 1.22, this function simply calls [slices.IsSorted]. 181 func IntsAreSorted(x []int) bool { return intsAreSortedImpl(x) } 182 183 // Float64sAreSorted reports whether the slice x is sorted in increasing order, 184 // with not-a-number (NaN) values before any other values. 185 // 186 // Note: as of Go 1.22, this function simply calls [slices.IsSorted]. 187 func Float64sAreSorted(x []float64) bool { return float64sAreSortedImpl(x) } 188 189 // StringsAreSorted reports whether the slice x is sorted in increasing order. 190 // 191 // Note: as of Go 1.22, this function simply calls [slices.IsSorted]. 192 func StringsAreSorted(x []string) bool { return stringsAreSortedImpl(x) } 193 194 // Notes on stable sorting: 195 // The used algorithms are simple and provable correct on all input and use 196 // only logarithmic additional stack space. They perform well if compared 197 // experimentally to other stable in-place sorting algorithms. 198 // 199 // Remarks on other algorithms evaluated: 200 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 201 // Not faster. 202 // - GCC's __rotate for block rotations: Not faster. 203 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 204 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 205 // The given algorithms are in-place, number of Swap and Assignments 206 // grow as n log n but the algorithm is not stable. 207 // - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and 208 // V. Raman in Algorithmica (1996) 16, 115-160: 209 // This algorithm either needs additional 2n bits or works only if there 210 // are enough different elements available to encode some permutations 211 // which have to be undone later (so not stable on any input). 212 // - All the optimal in-place sorting/merging algorithms I found are either 213 // unstable or rely on enough different elements in each step to encode the 214 // performed block rearrangements. See also "In-Place Merging Algorithms", 215 // Denham Coates-Evely, Department of Computer Science, Kings College, 216 // January 2004 and the references in there. 217 // - Often "optimal" algorithms are optimal in the number of assignments 218 // but Interface has only Swap as operation. 219 220 // Stable sorts data in ascending order as determined by the Less method, 221 // while keeping the original order of equal elements. 222 // 223 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 224 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 225 // 226 // Note: in many situations, the newer slices.SortStableFunc function is more 227 // ergonomic and runs faster. 228 func Stable(data Interface) { 229 stable(data, data.Len()) 230 } 231 232 /* 233 Complexity of Stable Sorting 234 235 236 Complexity of block swapping rotation 237 238 Each Swap puts one new element into its correct, final position. 239 Elements which reach their final position are no longer moved. 240 Thus block swapping rotation needs |u|+|v| calls to Swaps. 241 This is best possible as each element might need a move. 242 243 Pay attention when comparing to other optimal algorithms which 244 typically count the number of assignments instead of swaps: 245 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 246 rotations uses O(u + v + gcd(u,v)) assignments which is 247 better than our O(3 * (u+v)) as gcd(u,v) <= u. 248 249 250 Stable sorting by SymMerge and BlockSwap rotations 251 252 SymMerg complexity for same size input M = N: 253 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 254 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 255 256 (The following argument does not fuzz over a missing -1 or 257 other stuff which does not impact the final result). 258 259 Let n = data.Len(). Assume n = 2^k. 260 261 Plain merge sort performs log(n) = k iterations. 262 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 263 264 Thus iteration i of merge sort performs: 265 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 266 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 267 268 In total k = log(n) iterations are performed; so in total: 269 Calls to Less O(log(n) * n) 270 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 271 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 272 273 274 Above results should generalize to arbitrary n = 2^k + p 275 and should not be influenced by the initial insertion sort phase: 276 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 277 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 278 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 279 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 280 = O(n * log(n)) 281 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 282 283 */ 284