Source file src/syscall/wtf8_windows_test.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package syscall_test
     6  
     7  import (
     8  	"fmt"
     9  	"slices"
    10  	"syscall"
    11  	"testing"
    12  	"unicode/utf16"
    13  	"unicode/utf8"
    14  	"unsafe"
    15  )
    16  
    17  var wtf8tests = []struct {
    18  	str  string
    19  	wstr []uint16
    20  }{
    21  	{
    22  		str:  "\x00",
    23  		wstr: []uint16{0x00},
    24  	},
    25  	{
    26  		str:  "\x5C",
    27  		wstr: []uint16{0x5C},
    28  	},
    29  	{
    30  		str:  "\x7F",
    31  		wstr: []uint16{0x7F},
    32  	},
    33  
    34  	// 2-byte
    35  	{
    36  		str:  "\xC2\x80",
    37  		wstr: []uint16{0x80},
    38  	},
    39  	{
    40  		str:  "\xD7\x8A",
    41  		wstr: []uint16{0x05CA},
    42  	},
    43  	{
    44  		str:  "\xDF\xBF",
    45  		wstr: []uint16{0x07FF},
    46  	},
    47  
    48  	// 3-byte
    49  	{
    50  		str:  "\xE0\xA0\x80",
    51  		wstr: []uint16{0x0800},
    52  	},
    53  	{
    54  		str:  "\xE2\xB0\xBC",
    55  		wstr: []uint16{0x2C3C},
    56  	},
    57  	{
    58  		str:  "\xEF\xBF\xBF",
    59  		wstr: []uint16{0xFFFF},
    60  	},
    61  	// unmatched surrogate halves
    62  	// high surrogates: 0xD800 to 0xDBFF
    63  	{
    64  		str:  "\xED\xA0\x80",
    65  		wstr: []uint16{0xD800},
    66  	},
    67  	{
    68  		// "High surrogate followed by another high surrogate"
    69  		str:  "\xED\xA0\x80\xED\xA0\x80",
    70  		wstr: []uint16{0xD800, 0xD800},
    71  	},
    72  	{
    73  		// "High surrogate followed by a symbol that is not a surrogate"
    74  		str:  string([]byte{0xED, 0xA0, 0x80, 0xA}),
    75  		wstr: []uint16{0xD800, 0xA},
    76  	},
    77  	{
    78  		// "Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate"
    79  		str:  string([]byte{0xED, 0xA0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xA0, 0x80}),
    80  		wstr: []uint16{0xD800, 0xD834, 0xDF06, 0xD800},
    81  	},
    82  	{
    83  		str:  "\xED\xA6\xAF",
    84  		wstr: []uint16{0xD9AF},
    85  	},
    86  	{
    87  		str:  "\xED\xAF\xBF",
    88  		wstr: []uint16{0xDBFF},
    89  	},
    90  	// low surrogates: 0xDC00 to 0xDFFF
    91  	{
    92  		str:  "\xED\xB0\x80",
    93  		wstr: []uint16{0xDC00},
    94  	},
    95  	{
    96  		// "Low surrogate followed by another low surrogate"
    97  		str:  "\xED\xB0\x80\xED\xB0\x80",
    98  		wstr: []uint16{0xDC00, 0xDC00},
    99  	},
   100  	{
   101  		// "Low surrogate followed by a symbol that is not a surrogate"
   102  		str:  string([]byte{0xED, 0xB0, 0x80, 0xA}),
   103  		wstr: []uint16{0xDC00, 0xA},
   104  	},
   105  	{
   106  		// "Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate"
   107  		str:  string([]byte{0xED, 0xB0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xB0, 0x80}),
   108  		wstr: []uint16{0xDC00, 0xD834, 0xDF06, 0xDC00},
   109  	},
   110  	{
   111  		str:  "\xED\xBB\xAE",
   112  		wstr: []uint16{0xDEEE},
   113  	},
   114  	{
   115  		str:  "\xED\xBF\xBF",
   116  		wstr: []uint16{0xDFFF},
   117  	},
   118  
   119  	// 4-byte
   120  	{
   121  		str:  "\xF0\x90\x80\x80",
   122  		wstr: []uint16{0xD800, 0xDC00},
   123  	},
   124  	{
   125  		str:  "\xF0\x9D\x8C\x86",
   126  		wstr: []uint16{0xD834, 0xDF06},
   127  	},
   128  	{
   129  		str:  "\xF4\x8F\xBF\xBF",
   130  		wstr: []uint16{0xDBFF, 0xDFFF},
   131  	},
   132  }
   133  
   134  func TestWTF16Rountrip(t *testing.T) {
   135  	for _, tt := range wtf8tests {
   136  		t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) {
   137  			got := syscall.EncodeWTF16(tt.str, nil)
   138  			got2 := string(syscall.DecodeWTF16(got, nil))
   139  			if got2 != tt.str {
   140  				t.Errorf("got:\n%s\nwant:\n%s", got2, tt.str)
   141  			}
   142  		})
   143  	}
   144  }
   145  
   146  func TestWTF16Golden(t *testing.T) {
   147  	for _, tt := range wtf8tests {
   148  		t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) {
   149  			got := syscall.EncodeWTF16(tt.str, nil)
   150  			if !slices.Equal(got, tt.wstr) {
   151  				t.Errorf("got:\n%v\nwant:\n%v", got, tt.wstr)
   152  			}
   153  		})
   154  	}
   155  }
   156  
   157  func FuzzEncodeWTF16(f *testing.F) {
   158  	for _, tt := range wtf8tests {
   159  		f.Add(tt.str)
   160  	}
   161  	f.Fuzz(func(t *testing.T, b string) {
   162  		// test that there are no panics
   163  		got := syscall.EncodeWTF16(b, nil)
   164  		syscall.DecodeWTF16(got, nil)
   165  		if utf8.ValidString(b) {
   166  			// if the input is a valid UTF-8 string, then
   167  			// test that syscall.EncodeWTF16 behaves as
   168  			// utf16.Encode
   169  			want := utf16.Encode([]rune(b))
   170  			if !slices.Equal(got, want) {
   171  				t.Errorf("got:\n%v\nwant:\n%v", got, want)
   172  			}
   173  		}
   174  	})
   175  }
   176  
   177  func FuzzDecodeWTF16(f *testing.F) {
   178  	for _, tt := range wtf8tests {
   179  		b := unsafe.Slice((*uint8)(unsafe.Pointer(unsafe.SliceData(tt.wstr))), len(tt.wstr)*2)
   180  		f.Add(b)
   181  	}
   182  	f.Fuzz(func(t *testing.T, b []byte) {
   183  		u16 := unsafe.Slice((*uint16)(unsafe.Pointer(unsafe.SliceData(b))), len(b)/2)
   184  		got := syscall.DecodeWTF16(u16, nil)
   185  		if utf8.Valid(got) {
   186  			// if the input is a valid UTF-8 string, then
   187  			// test that syscall.DecodeWTF16 behaves as
   188  			// utf16.Decode
   189  			want := utf16.Decode(u16)
   190  			if string(got) != string(want) {
   191  				t.Errorf("got:\n%s\nwant:\n%s", string(got), string(want))
   192  			}
   193  		}
   194  		// WTF-8 should always roundtrip
   195  		got2 := syscall.EncodeWTF16(string(got), nil)
   196  		if !slices.Equal(got2, u16) {
   197  			t.Errorf("got:\n%v\nwant:\n%v", got2, u16)
   198  		}
   199  	})
   200  }
   201  

View as plain text