Source file src/unicode/utf16/utf16_test.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package utf16_test
     6  
     7  import (
     8  	"internal/testenv"
     9  	"slices"
    10  	"testing"
    11  	"unicode"
    12  	. "unicode/utf16"
    13  )
    14  
    15  // Validate the constants redefined from unicode.
    16  func TestConstants(t *testing.T) {
    17  	if MaxRune != unicode.MaxRune {
    18  		t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
    19  	}
    20  	if ReplacementChar != unicode.ReplacementChar {
    21  		t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar)
    22  	}
    23  }
    24  
    25  func TestRuneLen(t *testing.T) {
    26  	for _, tt := range []struct {
    27  		r      rune
    28  		length int
    29  	}{
    30  		{0, 1},
    31  		{Surr1 - 1, 1},
    32  		{Surr3, 1},
    33  		{SurrSelf - 1, 1},
    34  		{SurrSelf, 2},
    35  		{MaxRune, 2},
    36  		{MaxRune + 1, -1},
    37  		{-1, -1},
    38  	} {
    39  		if length := RuneLen(tt.r); length != tt.length {
    40  			t.Errorf("RuneLen(%#U) = %d, want %d", tt.r, length, tt.length)
    41  		}
    42  	}
    43  }
    44  
    45  type encodeTest struct {
    46  	in  []rune
    47  	out []uint16
    48  }
    49  
    50  var encodeTests = []encodeTest{
    51  	{[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}},
    52  	{[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
    53  		[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}},
    54  	{[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1},
    55  		[]uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}},
    56  }
    57  
    58  func TestEncode(t *testing.T) {
    59  	for _, tt := range encodeTests {
    60  		out := Encode(tt.in)
    61  		if !slices.Equal(out, tt.out) {
    62  			t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out)
    63  		}
    64  	}
    65  }
    66  
    67  func TestAppendRune(t *testing.T) {
    68  	for _, tt := range encodeTests {
    69  		var out []uint16
    70  		for _, u := range tt.in {
    71  			out = AppendRune(out, u)
    72  		}
    73  		if !slices.Equal(out, tt.out) {
    74  			t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out)
    75  		}
    76  	}
    77  }
    78  
    79  func TestEncodeRune(t *testing.T) {
    80  	for i, tt := range encodeTests {
    81  		j := 0
    82  		for _, r := range tt.in {
    83  			r1, r2 := EncodeRune(r)
    84  			if r < 0x10000 || r > unicode.MaxRune {
    85  				if j >= len(tt.out) {
    86  					t.Errorf("#%d: ran out of tt.out", i)
    87  					break
    88  				}
    89  				if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar {
    90  					t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2)
    91  				}
    92  				j++
    93  			} else {
    94  				if j+1 >= len(tt.out) {
    95  					t.Errorf("#%d: ran out of tt.out", i)
    96  					break
    97  				}
    98  				if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) {
    99  					t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1])
   100  				}
   101  				j += 2
   102  				dec := DecodeRune(r1, r2)
   103  				if dec != r {
   104  					t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r)
   105  				}
   106  			}
   107  		}
   108  		if j != len(tt.out) {
   109  			t.Errorf("#%d: EncodeRune didn't generate enough output", i)
   110  		}
   111  	}
   112  }
   113  
   114  type decodeTest struct {
   115  	in  []uint16
   116  	out []rune
   117  }
   118  
   119  var decodeTests = []decodeTest{
   120  	{[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}},
   121  	{[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
   122  		[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}},
   123  	{[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}},
   124  	{[]uint16{0xdfff}, []rune{0xfffd}},
   125  }
   126  
   127  func TestAllocationsDecode(t *testing.T) {
   128  	testenv.SkipIfOptimizationOff(t)
   129  
   130  	for _, tt := range decodeTests {
   131  		allocs := testing.AllocsPerRun(10, func() {
   132  			out := Decode(tt.in)
   133  			if out == nil {
   134  				t.Errorf("Decode(%x) = nil", tt.in)
   135  			}
   136  		})
   137  		if allocs > 0 {
   138  			t.Errorf("Decode allocated %v times", allocs)
   139  		}
   140  	}
   141  }
   142  
   143  func TestDecode(t *testing.T) {
   144  	for _, tt := range decodeTests {
   145  		out := Decode(tt.in)
   146  		if !slices.Equal(out, tt.out) {
   147  			t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out)
   148  		}
   149  	}
   150  }
   151  
   152  var decodeRuneTests = []struct {
   153  	r1, r2 rune
   154  	want   rune
   155  }{
   156  	{0xd800, 0xdc00, 0x10000},
   157  	{0xd800, 0xdc01, 0x10001},
   158  	{0xd808, 0xdf45, 0x12345},
   159  	{0xdbff, 0xdfff, 0x10ffff},
   160  	{0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted
   161  }
   162  
   163  func TestDecodeRune(t *testing.T) {
   164  	for i, tt := range decodeRuneTests {
   165  		got := DecodeRune(tt.r1, tt.r2)
   166  		if got != tt.want {
   167  			t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want)
   168  		}
   169  	}
   170  }
   171  
   172  var surrogateTests = []struct {
   173  	r    rune
   174  	want bool
   175  }{
   176  	// from https://en.wikipedia.org/wiki/UTF-16
   177  	{'\u007A', false},     // LATIN SMALL LETTER Z
   178  	{'\u6C34', false},     // CJK UNIFIED IDEOGRAPH-6C34 (water)
   179  	{'\uFEFF', false},     // Byte Order Mark
   180  	{'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point)
   181  	{'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF
   182  	{'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point)
   183  
   184  	{rune(0xd7ff), false}, // surr1-1
   185  	{rune(0xd800), true},  // surr1
   186  	{rune(0xdc00), true},  // surr2
   187  	{rune(0xe000), false}, // surr3
   188  	{rune(0xdfff), true},  // surr3-1
   189  }
   190  
   191  func TestIsSurrogate(t *testing.T) {
   192  	for i, tt := range surrogateTests {
   193  		got := IsSurrogate(tt.r)
   194  		if got != tt.want {
   195  			t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want)
   196  		}
   197  	}
   198  }
   199  
   200  func BenchmarkDecodeValidASCII(b *testing.B) {
   201  	// "hello world"
   202  	data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100}
   203  	for i := 0; i < b.N; i++ {
   204  		Decode(data)
   205  	}
   206  }
   207  
   208  func BenchmarkDecodeValidJapaneseChars(b *testing.B) {
   209  	// "日本語日本語日本語"
   210  	data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486}
   211  	for i := 0; i < b.N; i++ {
   212  		Decode(data)
   213  	}
   214  }
   215  
   216  func BenchmarkDecodeRune(b *testing.B) {
   217  	rs := make([]rune, 10)
   218  	// U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS
   219  	for i, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
   220  		rs[2*i], rs[2*i+1] = EncodeRune(u)
   221  	}
   222  
   223  	b.ResetTimer()
   224  	for i := 0; i < b.N; i++ {
   225  		for j := 0; j < 5; j++ {
   226  			DecodeRune(rs[2*j], rs[2*j+1])
   227  		}
   228  	}
   229  }
   230  
   231  func BenchmarkEncodeValidASCII(b *testing.B) {
   232  	data := []rune{'h', 'e', 'l', 'l', 'o'}
   233  	for i := 0; i < b.N; i++ {
   234  		Encode(data)
   235  	}
   236  }
   237  
   238  func BenchmarkEncodeValidJapaneseChars(b *testing.B) {
   239  	data := []rune{'日', '本', '語'}
   240  	for i := 0; i < b.N; i++ {
   241  		Encode(data)
   242  	}
   243  }
   244  
   245  func BenchmarkAppendRuneValidASCII(b *testing.B) {
   246  	data := []rune{'h', 'e', 'l', 'l', 'o'}
   247  	a := make([]uint16, 0, len(data)*2)
   248  	for i := 0; i < b.N; i++ {
   249  		for _, u := range data {
   250  			a = AppendRune(a, u)
   251  		}
   252  		a = a[:0]
   253  	}
   254  }
   255  
   256  func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) {
   257  	data := []rune{'日', '本', '語'}
   258  	a := make([]uint16, 0, len(data)*2)
   259  	for i := 0; i < b.N; i++ {
   260  		for _, u := range data {
   261  			a = AppendRune(a, u)
   262  		}
   263  		a = a[:0]
   264  	}
   265  }
   266  
   267  func BenchmarkEncodeRune(b *testing.B) {
   268  	for i := 0; i < b.N; i++ {
   269  		for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
   270  			EncodeRune(u)
   271  		}
   272  	}
   273  }
   274  

View as plain text