Source file src/sort/sort.go
1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run gen_sort_variants.go 6 7 // Package sort provides primitives for sorting slices and user-defined collections. 8 package sort 9 10 import ( 11 "math/bits" 12 "slices" 13 ) 14 15 // An implementation of Interface can be sorted by the routines in this package. 16 // The methods refer to elements of the underlying collection by integer index. 17 type Interface interface { 18 // Len is the number of elements in the collection. 19 Len() int 20 21 // Less reports whether the element with index i 22 // must sort before the element with index j. 23 // 24 // If both Less(i, j) and Less(j, i) are false, 25 // then the elements at index i and j are considered equal. 26 // Sort may place equal elements in any order in the final result, 27 // while Stable preserves the original input order of equal elements. 28 // 29 // Less must describe a transitive ordering: 30 // - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well. 31 // - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well. 32 // 33 // Note that floating-point comparison (the < operator on float32 or float64 values) 34 // is not a transitive ordering when not-a-number (NaN) values are involved. 35 // See Float64Slice.Less for a correct implementation for floating-point values. 36 Less(i, j int) bool 37 38 // Swap swaps the elements with indexes i and j. 39 Swap(i, j int) 40 } 41 42 // Sort sorts data in ascending order as determined by the Less method. 43 // It makes one call to data.Len to determine n and O(n*log(n)) calls to 44 // data.Less and data.Swap. The sort is not guaranteed to be stable. 45 // 46 // Note: in many situations, the newer [slices.SortFunc] function is more 47 // ergonomic and runs faster. 48 func Sort(data Interface) { 49 n := data.Len() 50 if n <= 1 { 51 return 52 } 53 limit := bits.Len(uint(n)) 54 pdqsort(data, 0, n, limit) 55 } 56 57 type sortedHint int // hint for pdqsort when choosing the pivot 58 59 const ( 60 unknownHint sortedHint = iota 61 increasingHint 62 decreasingHint 63 ) 64 65 // xorshift paper: https://www.jstatsoft.org/article/view/v008i14/xorshift.pdf 66 type xorshift uint64 67 68 func (r *xorshift) Next() uint64 { 69 *r ^= *r << 13 70 *r ^= *r >> 7 71 *r ^= *r << 17 72 return uint64(*r) 73 } 74 75 func nextPowerOfTwo(length int) uint { 76 shift := uint(bits.Len(uint(length))) 77 return uint(1 << shift) 78 } 79 80 // lessSwap is a pair of Less and Swap function for use with the 81 // auto-generated func-optimized variant of sort.go in 82 // zfuncversion.go. 83 type lessSwap struct { 84 Less func(i, j int) bool 85 Swap func(i, j int) 86 } 87 88 type reverse struct { 89 // This embedded Interface permits Reverse to use the methods of 90 // another Interface implementation. 91 Interface 92 } 93 94 // Less returns the opposite of the embedded implementation's Less method. 95 func (r reverse) Less(i, j int) bool { 96 return r.Interface.Less(j, i) 97 } 98 99 // Reverse returns the reverse order for data. 100 func Reverse(data Interface) Interface { 101 return &reverse{data} 102 } 103 104 // IsSorted reports whether data is sorted. 105 // 106 // Note: in many situations, the newer [slices.IsSortedFunc] function is more 107 // ergonomic and runs faster. 108 func IsSorted(data Interface) bool { 109 n := data.Len() 110 for i := n - 1; i > 0; i-- { 111 if data.Less(i, i-1) { 112 return false 113 } 114 } 115 return true 116 } 117 118 // Convenience types for common cases 119 120 // IntSlice attaches the methods of Interface to []int, sorting in increasing order. 121 type IntSlice []int 122 123 func (x IntSlice) Len() int { return len(x) } 124 func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] } 125 func (x IntSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 126 127 // Sort is a convenience method: x.Sort() calls Sort(x). 128 func (x IntSlice) Sort() { Sort(x) } 129 130 // Float64Slice implements Interface for a []float64, sorting in increasing order, 131 // with not-a-number (NaN) values ordered before other values. 132 type Float64Slice []float64 133 134 func (x Float64Slice) Len() int { return len(x) } 135 136 // Less reports whether x[i] should be ordered before x[j], as required by the sort Interface. 137 // Note that floating-point comparison by itself is not a transitive relation: it does not 138 // report a consistent ordering for not-a-number (NaN) values. 139 // This implementation of Less places NaN values before any others, by using: 140 // 141 // x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j])) 142 func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) } 143 func (x Float64Slice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 144 145 // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. 146 func isNaN(f float64) bool { 147 return f != f 148 } 149 150 // Sort is a convenience method: x.Sort() calls Sort(x). 151 func (x Float64Slice) Sort() { Sort(x) } 152 153 // StringSlice attaches the methods of Interface to []string, sorting in increasing order. 154 type StringSlice []string 155 156 func (x StringSlice) Len() int { return len(x) } 157 func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] } 158 func (x StringSlice) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 159 160 // Sort is a convenience method: x.Sort() calls Sort(x). 161 func (x StringSlice) Sort() { Sort(x) } 162 163 // Convenience wrappers for common cases 164 165 // Ints sorts a slice of ints in increasing order. 166 // 167 // Note: as of Go 1.22, this function simply calls [slices.Sort]. 168 func Ints(x []int) { slices.Sort(x) } 169 170 // Float64s sorts a slice of float64s in increasing order. 171 // Not-a-number (NaN) values are ordered before other values. 172 // 173 // Note: as of Go 1.22, this function simply calls [slices.Sort]. 174 func Float64s(x []float64) { slices.Sort(x) } 175 176 // Strings sorts a slice of strings in increasing order. 177 // 178 // Note: as of Go 1.22, this function simply calls [slices.Sort]. 179 func Strings(x []string) { slices.Sort(x) } 180 181 // IntsAreSorted reports whether the slice x is sorted in increasing order. 182 // 183 // Note: as of Go 1.22, this function simply calls [slices.IsSorted]. 184 func IntsAreSorted(x []int) bool { return slices.IsSorted(x) } 185 186 // Float64sAreSorted reports whether the slice x is sorted in increasing order, 187 // with not-a-number (NaN) values before any other values. 188 // 189 // Note: as of Go 1.22, this function simply calls [slices.IsSorted]. 190 func Float64sAreSorted(x []float64) bool { return slices.IsSorted(x) } 191 192 // StringsAreSorted reports whether the slice x is sorted in increasing order. 193 // 194 // Note: as of Go 1.22, this function simply calls [slices.IsSorted]. 195 func StringsAreSorted(x []string) bool { return slices.IsSorted(x) } 196 197 // Notes on stable sorting: 198 // The used algorithms are simple and provable correct on all input and use 199 // only logarithmic additional stack space. They perform well if compared 200 // experimentally to other stable in-place sorting algorithms. 201 // 202 // Remarks on other algorithms evaluated: 203 // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: 204 // Not faster. 205 // - GCC's __rotate for block rotations: Not faster. 206 // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen 207 // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: 208 // The given algorithms are in-place, number of Swap and Assignments 209 // grow as n log n but the algorithm is not stable. 210 // - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and 211 // V. Raman in Algorithmica (1996) 16, 115-160: 212 // This algorithm either needs additional 2n bits or works only if there 213 // are enough different elements available to encode some permutations 214 // which have to be undone later (so not stable on any input). 215 // - All the optimal in-place sorting/merging algorithms I found are either 216 // unstable or rely on enough different elements in each step to encode the 217 // performed block rearrangements. See also "In-Place Merging Algorithms", 218 // Denham Coates-Evely, Department of Computer Science, Kings College, 219 // January 2004 and the references in there. 220 // - Often "optimal" algorithms are optimal in the number of assignments 221 // but Interface has only Swap as operation. 222 223 // Stable sorts data in ascending order as determined by the Less method, 224 // while keeping the original order of equal elements. 225 // 226 // It makes one call to data.Len to determine n, O(n*log(n)) calls to 227 // data.Less and O(n*log(n)*log(n)) calls to data.Swap. 228 // 229 // Note: in many situations, the newer slices.SortStableFunc function is more 230 // ergonomic and runs faster. 231 func Stable(data Interface) { 232 stable(data, data.Len()) 233 } 234 235 /* 236 Complexity of Stable Sorting 237 238 239 Complexity of block swapping rotation 240 241 Each Swap puts one new element into its correct, final position. 242 Elements which reach their final position are no longer moved. 243 Thus block swapping rotation needs |u|+|v| calls to Swaps. 244 This is best possible as each element might need a move. 245 246 Pay attention when comparing to other optimal algorithms which 247 typically count the number of assignments instead of swaps: 248 E.g. the optimal algorithm of Dudzinski and Dydek for in-place 249 rotations uses O(u + v + gcd(u,v)) assignments which is 250 better than our O(3 * (u+v)) as gcd(u,v) <= u. 251 252 253 Stable sorting by SymMerge and BlockSwap rotations 254 255 SymMerg complexity for same size input M = N: 256 Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) 257 Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) 258 259 (The following argument does not fuzz over a missing -1 or 260 other stuff which does not impact the final result). 261 262 Let n = data.Len(). Assume n = 2^k. 263 264 Plain merge sort performs log(n) = k iterations. 265 On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. 266 267 Thus iteration i of merge sort performs: 268 Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) 269 Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) 270 271 In total k = log(n) iterations are performed; so in total: 272 Calls to Less O(log(n) * n) 273 Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) 274 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) 275 276 277 Above results should generalize to arbitrary n = 2^k + p 278 and should not be influenced by the initial insertion sort phase: 279 Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of 280 size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. 281 Merge sort iterations start at i = log(bs). With t = log(bs) constant: 282 Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) 283 = O(n * log(n)) 284 Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) 285 286 */ 287