Source file src/html/template/url.go
1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package template 6 7 import ( 8 "fmt" 9 "strings" 10 ) 11 12 // urlFilter returns its input unless it contains an unsafe scheme in which 13 // case it defangs the entire URL. 14 // 15 // Schemes that cause unintended side effects that are irreversible without user 16 // interaction are considered unsafe. For example, clicking on a "javascript:" 17 // link can immediately trigger JavaScript code execution. 18 // 19 // This filter conservatively assumes that all schemes other than the following 20 // are unsafe: 21 // - http: Navigates to a new website, and may open a new window or tab. 22 // These side effects can be reversed by navigating back to the 23 // previous website, or closing the window or tab. No irreversible 24 // changes will take place without further user interaction with 25 // the new website. 26 // - https: Same as http. 27 // - mailto: Opens an email program and starts a new draft. This side effect 28 // is not irreversible until the user explicitly clicks send; it 29 // can be undone by closing the email program. 30 // 31 // To allow URLs containing other schemes to bypass this filter, developers must 32 // explicitly indicate that such a URL is expected and safe by encapsulating it 33 // in a template.URL value. 34 func urlFilter(args ...any) string { 35 s, t := stringify(args...) 36 if t == contentTypeURL { 37 return s 38 } 39 if !isSafeURL(s) { 40 return "#" + filterFailsafe 41 } 42 return s 43 } 44 45 // isSafeURL is true if s is a relative URL or if URL has a protocol in 46 // (http, https, mailto). 47 func isSafeURL(s string) bool { 48 if protocol, _, ok := strings.Cut(s, ":"); ok && !strings.Contains(protocol, "/") { 49 if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") { 50 return false 51 } 52 } 53 return true 54 } 55 56 // urlEscaper produces an output that can be embedded in a URL query. 57 // The output can be embedded in an HTML attribute without further escaping. 58 func urlEscaper(args ...any) string { 59 return urlProcessor(false, args...) 60 } 61 62 // urlNormalizer normalizes URL content so it can be embedded in a quote-delimited 63 // string or parenthesis delimited url(...). 64 // The normalizer does not encode all HTML specials. Specifically, it does not 65 // encode '&' so correct embedding in an HTML attribute requires escaping of 66 // '&' to '&'. 67 func urlNormalizer(args ...any) string { 68 return urlProcessor(true, args...) 69 } 70 71 // urlProcessor normalizes (when norm is true) or escapes its input to produce 72 // a valid hierarchical or opaque URL part. 73 func urlProcessor(norm bool, args ...any) string { 74 s, t := stringify(args...) 75 if t == contentTypeURL { 76 norm = true 77 } 78 var b strings.Builder 79 if processURLOnto(s, norm, &b) { 80 return b.String() 81 } 82 return s 83 } 84 85 // processURLOnto appends a normalized URL corresponding to its input to b 86 // and reports whether the appended content differs from s. 87 func processURLOnto(s string, norm bool, b *strings.Builder) bool { 88 b.Grow(len(s) + 16) 89 written := 0 90 // The byte loop below assumes that all URLs use UTF-8 as the 91 // content-encoding. This is similar to the URI to IRI encoding scheme 92 // defined in section 3.1 of RFC 3987, and behaves the same as the 93 // EcmaScript builtin encodeURIComponent. 94 // It should not cause any misencoding of URLs in pages with 95 // Content-type: text/html;charset=UTF-8. 96 for i, n := 0, len(s); i < n; i++ { 97 c := s[i] 98 switch c { 99 // Single quote and parens are sub-delims in RFC 3986, but we 100 // escape them so the output can be embedded in single 101 // quoted attributes and unquoted CSS url(...) constructs. 102 // Single quotes are reserved in URLs, but are only used in 103 // the obsolete "mark" rule in an appendix in RFC 3986 104 // so can be safely encoded. 105 case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']': 106 if norm { 107 continue 108 } 109 // Unreserved according to RFC 3986 sec 2.3 110 // "For consistency, percent-encoded octets in the ranges of 111 // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), 112 // period (%2E), underscore (%5F), or tilde (%7E) should not be 113 // created by URI producers 114 case '-', '.', '_', '~': 115 continue 116 case '%': 117 // When normalizing do not re-encode valid escapes. 118 if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) { 119 continue 120 } 121 default: 122 // Unreserved according to RFC 3986 sec 2.3 123 if 'a' <= c && c <= 'z' { 124 continue 125 } 126 if 'A' <= c && c <= 'Z' { 127 continue 128 } 129 if '0' <= c && c <= '9' { 130 continue 131 } 132 } 133 b.WriteString(s[written:i]) 134 fmt.Fprintf(b, "%%%02x", c) 135 written = i + 1 136 } 137 b.WriteString(s[written:]) 138 return written != 0 139 } 140 141 // Filters and normalizes srcset values which are comma separated 142 // URLs followed by metadata. 143 func srcsetFilterAndEscaper(args ...any) string { 144 s, t := stringify(args...) 145 switch t { 146 case contentTypeSrcset: 147 return s 148 case contentTypeURL: 149 // Normalizing gets rid of all HTML whitespace 150 // which separate the image URL from its metadata. 151 var b strings.Builder 152 if processURLOnto(s, true, &b) { 153 s = b.String() 154 } 155 // Additionally, commas separate one source from another. 156 return strings.ReplaceAll(s, ",", "%2c") 157 } 158 159 var b strings.Builder 160 written := 0 161 for i := 0; i < len(s); i++ { 162 if s[i] == ',' { 163 filterSrcsetElement(s, written, i, &b) 164 b.WriteString(",") 165 written = i + 1 166 } 167 } 168 filterSrcsetElement(s, written, len(s), &b) 169 return b.String() 170 } 171 172 // Derived from https://play.golang.org/p/Dhmj7FORT5 173 const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07" 174 175 // isHTMLSpace is true iff c is a whitespace character per 176 // https://infra.spec.whatwg.org/#ascii-whitespace 177 func isHTMLSpace(c byte) bool { 178 return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7))) 179 } 180 181 func isHTMLSpaceOrASCIIAlnum(c byte) bool { 182 return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7))) 183 } 184 185 func filterSrcsetElement(s string, left int, right int, b *strings.Builder) { 186 start := left 187 for start < right && isHTMLSpace(s[start]) { 188 start++ 189 } 190 end := right 191 for i := start; i < right; i++ { 192 if isHTMLSpace(s[i]) { 193 end = i 194 break 195 } 196 } 197 if url := s[start:end]; isSafeURL(url) { 198 // If image metadata is only spaces or alnums then 199 // we don't need to URL normalize it. 200 metadataOk := true 201 for i := end; i < right; i++ { 202 if !isHTMLSpaceOrASCIIAlnum(s[i]) { 203 metadataOk = false 204 break 205 } 206 } 207 if metadataOk { 208 b.WriteString(s[left:start]) 209 processURLOnto(url, true, b) 210 b.WriteString(s[end:right]) 211 return 212 } 213 } 214 b.WriteString("#") 215 b.WriteString(filterFailsafe) 216 } 217