Source file src/mime/mediatype.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package mime
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"maps"
    11  	"slices"
    12  	"strings"
    13  	"unicode"
    14  )
    15  
    16  // FormatMediaType serializes mediatype t and the parameters
    17  // param as a media type conforming to RFC 2045 and RFC 2616.
    18  // The type and parameter names are written in lower-case.
    19  // When any of the arguments result in a standard violation then
    20  // FormatMediaType returns the empty string.
    21  func FormatMediaType(t string, param map[string]string) string {
    22  	var b strings.Builder
    23  	if major, sub, ok := strings.Cut(t, "/"); !ok {
    24  		if !isToken(t) {
    25  			return ""
    26  		}
    27  		b.WriteString(strings.ToLower(t))
    28  	} else {
    29  		if !isToken(major) || !isToken(sub) {
    30  			return ""
    31  		}
    32  		b.WriteString(strings.ToLower(major))
    33  		b.WriteByte('/')
    34  		b.WriteString(strings.ToLower(sub))
    35  	}
    36  
    37  	for _, attribute := range slices.Sorted(maps.Keys(param)) {
    38  		value := param[attribute]
    39  		b.WriteByte(';')
    40  		b.WriteByte(' ')
    41  		if !isToken(attribute) {
    42  			return ""
    43  		}
    44  		b.WriteString(strings.ToLower(attribute))
    45  
    46  		needEnc := needsEncoding(value)
    47  		if needEnc {
    48  			// RFC 2231 section 4
    49  			b.WriteByte('*')
    50  		}
    51  		b.WriteByte('=')
    52  
    53  		if needEnc {
    54  			b.WriteString("utf-8''")
    55  
    56  			offset := 0
    57  			for index := 0; index < len(value); index++ {
    58  				ch := value[index]
    59  				// {RFC 2231 section 7}
    60  				// attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials>
    61  				if ch <= ' ' || ch >= 0x7F ||
    62  					ch == '*' || ch == '\'' || ch == '%' ||
    63  					isTSpecial(rune(ch)) {
    64  
    65  					b.WriteString(value[offset:index])
    66  					offset = index + 1
    67  
    68  					b.WriteByte('%')
    69  					b.WriteByte(upperhex[ch>>4])
    70  					b.WriteByte(upperhex[ch&0x0F])
    71  				}
    72  			}
    73  			b.WriteString(value[offset:])
    74  			continue
    75  		}
    76  
    77  		if isToken(value) {
    78  			b.WriteString(value)
    79  			continue
    80  		}
    81  
    82  		b.WriteByte('"')
    83  		offset := 0
    84  		for index := 0; index < len(value); index++ {
    85  			character := value[index]
    86  			if character == '"' || character == '\\' {
    87  				b.WriteString(value[offset:index])
    88  				offset = index
    89  				b.WriteByte('\\')
    90  			}
    91  		}
    92  		b.WriteString(value[offset:])
    93  		b.WriteByte('"')
    94  	}
    95  	return b.String()
    96  }
    97  
    98  func checkMediaTypeDisposition(s string) error {
    99  	typ, rest := consumeToken(s)
   100  	if typ == "" {
   101  		return errors.New("mime: no media type")
   102  	}
   103  	if rest == "" {
   104  		return nil
   105  	}
   106  	if !strings.HasPrefix(rest, "/") {
   107  		return errors.New("mime: expected slash after first token")
   108  	}
   109  	subtype, rest := consumeToken(rest[1:])
   110  	if subtype == "" {
   111  		return errors.New("mime: expected token after slash")
   112  	}
   113  	if rest != "" {
   114  		return errors.New("mime: unexpected content after media subtype")
   115  	}
   116  	return nil
   117  }
   118  
   119  // ErrInvalidMediaParameter is returned by [ParseMediaType] if
   120  // the media type value was found but there was an error parsing
   121  // the optional parameters
   122  var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter")
   123  
   124  // ParseMediaType parses a media type value and any optional
   125  // parameters, per RFC 1521.  Media types are the values in
   126  // Content-Type and Content-Disposition headers (RFC 2183).
   127  // On success, ParseMediaType returns the media type converted
   128  // to lowercase and trimmed of white space and a non-nil map.
   129  // If there is an error parsing the optional parameter,
   130  // the media type will be returned along with the error
   131  // [ErrInvalidMediaParameter].
   132  // The returned map, params, maps from the lowercase
   133  // attribute to the attribute value with its case preserved.
   134  func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
   135  	base, _, _ := strings.Cut(v, ";")
   136  	mediatype = strings.TrimSpace(strings.ToLower(base))
   137  
   138  	err = checkMediaTypeDisposition(mediatype)
   139  	if err != nil {
   140  		return "", nil, err
   141  	}
   142  
   143  	params = make(map[string]string)
   144  
   145  	// Map of base parameter name -> parameter name -> value
   146  	// for parameters containing a '*' character.
   147  	// Lazily initialized.
   148  	var continuation map[string]map[string]string
   149  
   150  	v = v[len(base):]
   151  	for len(v) > 0 {
   152  		v = strings.TrimLeftFunc(v, unicode.IsSpace)
   153  		if len(v) == 0 {
   154  			break
   155  		}
   156  		key, value, rest := consumeMediaParam(v)
   157  		if key == "" {
   158  			if strings.TrimSpace(rest) == ";" {
   159  				// Ignore trailing semicolons.
   160  				// Not an error.
   161  				break
   162  			}
   163  			// Parse error.
   164  			return mediatype, nil, ErrInvalidMediaParameter
   165  		}
   166  
   167  		pmap := params
   168  		if baseName, _, ok := strings.Cut(key, "*"); ok {
   169  			if continuation == nil {
   170  				continuation = make(map[string]map[string]string)
   171  			}
   172  			var ok bool
   173  			if pmap, ok = continuation[baseName]; !ok {
   174  				continuation[baseName] = make(map[string]string)
   175  				pmap = continuation[baseName]
   176  			}
   177  		}
   178  		if v, exists := pmap[key]; exists && v != value {
   179  			// Duplicate parameter names are incorrect, but we allow them if they are equal.
   180  			return "", nil, errors.New("mime: duplicate parameter name")
   181  		}
   182  		pmap[key] = value
   183  		v = rest
   184  	}
   185  
   186  	// Stitch together any continuations or things with stars
   187  	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
   188  	var buf strings.Builder
   189  	for key, pieceMap := range continuation {
   190  		singlePartKey := key + "*"
   191  		if v, ok := pieceMap[singlePartKey]; ok {
   192  			if decv, ok := decode2231Enc(v); ok {
   193  				params[key] = decv
   194  			}
   195  			continue
   196  		}
   197  
   198  		buf.Reset()
   199  		valid := false
   200  		for n := 0; ; n++ {
   201  			simplePart := fmt.Sprintf("%s*%d", key, n)
   202  			if v, ok := pieceMap[simplePart]; ok {
   203  				valid = true
   204  				buf.WriteString(v)
   205  				continue
   206  			}
   207  			encodedPart := simplePart + "*"
   208  			v, ok := pieceMap[encodedPart]
   209  			if !ok {
   210  				break
   211  			}
   212  			valid = true
   213  			if n == 0 {
   214  				if decv, ok := decode2231Enc(v); ok {
   215  					buf.WriteString(decv)
   216  				}
   217  			} else {
   218  				decv, _ := percentHexUnescape(v)
   219  				buf.WriteString(decv)
   220  			}
   221  		}
   222  		if valid {
   223  			params[key] = buf.String()
   224  		}
   225  	}
   226  
   227  	return
   228  }
   229  
   230  func decode2231Enc(v string) (string, bool) {
   231  	sv := strings.SplitN(v, "'", 3)
   232  	if len(sv) != 3 {
   233  		return "", false
   234  	}
   235  	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
   236  	// need to decide how to expose it in the API. But I'm not sure
   237  	// anybody uses it in practice.
   238  	charset := strings.ToLower(sv[0])
   239  	if len(charset) == 0 {
   240  		return "", false
   241  	}
   242  	if charset != "us-ascii" && charset != "utf-8" {
   243  		// TODO: unsupported encoding
   244  		return "", false
   245  	}
   246  	encv, err := percentHexUnescape(sv[2])
   247  	if err != nil {
   248  		return "", false
   249  	}
   250  	return encv, true
   251  }
   252  
   253  func isNotTokenChar(r rune) bool {
   254  	return !isTokenChar(r)
   255  }
   256  
   257  // consumeToken consumes a token from the beginning of provided
   258  // string, per RFC 2045 section 5.1 (referenced from 2183), and return
   259  // the token consumed and the rest of the string. Returns ("", v) on
   260  // failure to consume at least one character.
   261  func consumeToken(v string) (token, rest string) {
   262  	notPos := strings.IndexFunc(v, isNotTokenChar)
   263  	if notPos == -1 {
   264  		return v, ""
   265  	}
   266  	if notPos == 0 {
   267  		return "", v
   268  	}
   269  	return v[0:notPos], v[notPos:]
   270  }
   271  
   272  // consumeValue consumes a "value" per RFC 2045, where a value is
   273  // either a 'token' or a 'quoted-string'.  On success, consumeValue
   274  // returns the value consumed (and de-quoted/escaped, if a
   275  // quoted-string) and the rest of the string. On failure, returns
   276  // ("", v).
   277  func consumeValue(v string) (value, rest string) {
   278  	if v == "" {
   279  		return
   280  	}
   281  	if v[0] != '"' {
   282  		return consumeToken(v)
   283  	}
   284  
   285  	// parse a quoted-string
   286  	buffer := new(strings.Builder)
   287  	for i := 1; i < len(v); i++ {
   288  		r := v[i]
   289  		if r == '"' {
   290  			return buffer.String(), v[i+1:]
   291  		}
   292  		// When MSIE sends a full file path (in "intranet mode"), it does not
   293  		// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
   294  		//
   295  		// No known MIME generators emit unnecessary backslash escapes
   296  		// for simple token characters like numbers and letters.
   297  		//
   298  		// If we see an unnecessary backslash escape, assume it is from MSIE
   299  		// and intended as a literal backslash. This makes Go servers deal better
   300  		// with MSIE without affecting the way they handle conforming MIME
   301  		// generators.
   302  		if r == '\\' && i+1 < len(v) && isTSpecial(rune(v[i+1])) {
   303  			buffer.WriteByte(v[i+1])
   304  			i++
   305  			continue
   306  		}
   307  		if r == '\r' || r == '\n' {
   308  			return "", v
   309  		}
   310  		buffer.WriteByte(v[i])
   311  	}
   312  	// Did not find end quote.
   313  	return "", v
   314  }
   315  
   316  func consumeMediaParam(v string) (param, value, rest string) {
   317  	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
   318  	if !strings.HasPrefix(rest, ";") {
   319  		return "", "", v
   320  	}
   321  
   322  	rest = rest[1:] // consume semicolon
   323  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   324  	param, rest = consumeToken(rest)
   325  	param = strings.ToLower(param)
   326  	if param == "" {
   327  		return "", "", v
   328  	}
   329  
   330  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   331  	if !strings.HasPrefix(rest, "=") {
   332  		return "", "", v
   333  	}
   334  	rest = rest[1:] // consume equals sign
   335  	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
   336  	value, rest2 := consumeValue(rest)
   337  	if value == "" && rest2 == rest {
   338  		return "", "", v
   339  	}
   340  	rest = rest2
   341  	return param, value, rest
   342  }
   343  
   344  func percentHexUnescape(s string) (string, error) {
   345  	// Count %, check that they're well-formed.
   346  	percents := 0
   347  	for i := 0; i < len(s); {
   348  		if s[i] != '%' {
   349  			i++
   350  			continue
   351  		}
   352  		percents++
   353  		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
   354  			s = s[i:]
   355  			if len(s) > 3 {
   356  				s = s[0:3]
   357  			}
   358  			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
   359  		}
   360  		i += 3
   361  	}
   362  	if percents == 0 {
   363  		return s, nil
   364  	}
   365  
   366  	t := make([]byte, len(s)-2*percents)
   367  	j := 0
   368  	for i := 0; i < len(s); {
   369  		switch s[i] {
   370  		case '%':
   371  			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
   372  			j++
   373  			i += 3
   374  		default:
   375  			t[j] = s[i]
   376  			j++
   377  			i++
   378  		}
   379  	}
   380  	return string(t), nil
   381  }
   382  
   383  func ishex(c byte) bool {
   384  	switch {
   385  	case '0' <= c && c <= '9':
   386  		return true
   387  	case 'a' <= c && c <= 'f':
   388  		return true
   389  	case 'A' <= c && c <= 'F':
   390  		return true
   391  	}
   392  	return false
   393  }
   394  
   395  func unhex(c byte) byte {
   396  	switch {
   397  	case '0' <= c && c <= '9':
   398  		return c - '0'
   399  	case 'a' <= c && c <= 'f':
   400  		return c - 'a' + 10
   401  	case 'A' <= c && c <= 'F':
   402  		return c - 'A' + 10
   403  	}
   404  	return 0
   405  }
   406  

View as plain text