| // Copyright 2011 The Go Authors. All rights reserved. | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| package template | |
| import ( | |
| "bytes" | |
| "fmt" | |
| "strings" | |
| "unicode" | |
| "unicode/utf8" | |
| ) | |
| // endsWithCSSKeyword reports whether b ends with an ident that | |
| // case-insensitively matches the lower-case kw. | |
| func endsWithCSSKeyword(b []byte, kw string) bool { | |
| i := len(b) - len(kw) | |
| if i < 0 { | |
| // Too short. | |
| return false | |
| } | |
| if i != 0 { | |
| r, _ := utf8.DecodeLastRune(b[:i]) | |
| if isCSSNmchar(r) { | |
| // Too long. | |
| return false | |
| } | |
| } | |
| // Many CSS keywords, such as "!important" can have characters encoded, | |
| // but the URI production does not allow that according to | |
| // https://www.w3.org/TR/css3-syntax/#TOK-URI | |
| // This does not attempt to recognize encoded keywords. For example, | |
| // given "\75\72\6c" and "url" this return false. | |
| return string(bytes.ToLower(b[i:])) == kw | |
| } | |
| // isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier. | |
| func isCSSNmchar(r rune) bool { | |
| // Based on the CSS3 nmchar production but ignores multi-rune escape | |
| // sequences. | |
| // https://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar | |
| return 'a' <= r && r <= 'z' || | |
| 'A' <= r && r <= 'Z' || | |
| '0' <= r && r <= '9' || | |
| r == '-' || | |
| r == '_' || | |
| // Non-ASCII cases below. | |
| 0x80 <= r && r <= 0xd7ff || | |
| 0xe000 <= r && r <= 0xfffd || | |
| 0x10000 <= r && r <= 0x10ffff | |
| } | |
| // decodeCSS decodes CSS3 escapes given a sequence of stringchars. | |
| // If there is no change, it returns the input, otherwise it returns a slice | |
| // backed by a new array. | |
| // https://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar. | |
| func decodeCSS(s []byte) []byte { | |
| i := bytes.IndexByte(s, '\\') | |
| if i == -1 { | |
| return s | |
| } | |
| // The UTF-8 sequence for a codepoint is never longer than 1 + the | |
| // number hex digits need to represent that codepoint, so len(s) is an | |
| // upper bound on the output length. | |
| b := make([]byte, 0, len(s)) | |
| for len(s) != 0 { | |
| i := bytes.IndexByte(s, '\\') | |
| if i == -1 { | |
| i = len(s) | |
| } | |
| b, s = append(b, s[:i]...), s[i:] | |
| if len(s) < 2 { | |
| break | |
| } | |
| // https://www.w3.org/TR/css3-syntax/#SUBTOK-escape | |
| // escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] | |
| if isHex(s[1]) { | |
| // https://www.w3.org/TR/css3-syntax/#SUBTOK-unicode | |
| // unicode ::= '\' [0-9a-fA-F]{1,6} wc? | |
| j := 2 | |
| for j < len(s) && j < 7 && isHex(s[j]) { | |
| j++ | |
| } | |
| r := hexDecode(s[1:j]) | |
| if r > unicode.MaxRune { | |
| r, j = r/16, j-1 | |
| } | |
| n := utf8.EncodeRune(b[len(b):cap(b)], r) | |
| // The optional space at the end allows a hex | |
| // sequence to be followed by a literal hex. | |
| // string(decodeCSS([]byte(`\A B`))) == "\nB" | |
| b, s = b[:len(b)+n], skipCSSSpace(s[j:]) | |
| } else { | |
| // `\\` decodes to `\` and `\"` to `"`. | |
| _, n := utf8.DecodeRune(s[1:]) | |
| b, s = append(b, s[1:1+n]...), s[1+n:] | |
| } | |
| } | |
| return b | |
| } | |
| // isHex reports whether the given character is a hex digit. | |
| func isHex(c byte) bool { | |
| return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' | |
| } | |
| // hexDecode decodes a short hex digit sequence: "10" -> 16. | |
| func hexDecode(s []byte) rune { | |
| n := '\x00' | |
| for _, c := range s { | |
| n <<= 4 | |
| switch { | |
| case '0' <= c && c <= '9': | |
| n |= rune(c - '0') | |
| case 'a' <= c && c <= 'f': | |
| n |= rune(c-'a') + 10 | |
| case 'A' <= c && c <= 'F': | |
| n |= rune(c-'A') + 10 | |
| default: | |
| panic(fmt.Sprintf("Bad hex digit in %q", s)) | |
| } | |
| } | |
| return n | |
| } | |
| // skipCSSSpace returns a suffix of c, skipping over a single space. | |
| func skipCSSSpace(c []byte) []byte { | |
| if len(c) == 0 { | |
| return c | |
| } | |
| // wc ::= #x9 | #xA | #xC | #xD | #x20 | |
| switch c[0] { | |
| case '\t', '\n', '\f', ' ': | |
| return c[1:] | |
| case '\r': | |
| // This differs from CSS3's wc production because it contains a | |
| // probable spec error whereby wc contains all the single byte | |
| // sequences in nl (newline) but not CRLF. | |
| if len(c) >= 2 && c[1] == '\n' { | |
| return c[2:] | |
| } | |
| return c[1:] | |
| } | |
| return c | |
| } | |
| // isCSSSpace reports whether b is a CSS space char as defined in wc. | |
| func isCSSSpace(b byte) bool { | |
| switch b { | |
| case '\t', '\n', '\f', '\r', ' ': | |
| return true | |
| } | |
| return false | |
| } | |
| // cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes. | |
| func cssEscaper(args ...any) string { | |
| s, _ := stringify(args...) | |
| var b strings.Builder | |
| r, w, written := rune(0), 0, 0 | |
| for i := 0; i < len(s); i += w { | |
| // See comment in htmlEscaper. | |
| r, w = utf8.DecodeRuneInString(s[i:]) | |
| var repl string | |
| switch { | |
| case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "": | |
| repl = cssReplacementTable[r] | |
| default: | |
| continue | |
| } | |
| if written == 0 { | |
| b.Grow(len(s)) | |
| } | |
| b.WriteString(s[written:i]) | |
| b.WriteString(repl) | |
| written = i + w | |
| if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) { | |
| b.WriteByte(' ') | |
| } | |
| } | |
| if written == 0 { | |
| return s | |
| } | |
| b.WriteString(s[written:]) | |
| return b.String() | |
| } | |
| var cssReplacementTable = []string{ | |
| 0: `\0`, | |
| '\t': `\9`, | |
| '\n': `\a`, | |
| '\f': `\c`, | |
| '\r': `\d`, | |
| // Encode HTML specials as hex so the output can be embedded | |
| // in HTML attributes without further encoding. | |
| '"': `\22`, | |
| '&': `\26`, | |
| '\'': `\27`, | |
| '(': `\28`, | |
| ')': `\29`, | |
| '+': `\2b`, | |
| '/': `\2f`, | |
| ':': `\3a`, | |
| ';': `\3b`, | |
| '<': `\3c`, | |
| '>': `\3e`, | |
| '\\': `\\`, | |
| '{': `\7b`, | |
| '}': `\7d`, | |
| } | |
| var expressionBytes = []byte("expression") | |
| var mozBindingBytes = []byte("mozbinding") | |
| // cssValueFilter allows innocuous CSS values in the output including CSS | |
| // quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values | |
| // (inherit, blue), and colors (#888). | |
| // It filters out unsafe values, such as those that affect token boundaries, | |
| // and anything that might execute scripts. | |
| func cssValueFilter(args ...any) string { | |
| s, t := stringify(args...) | |
| if t == contentTypeCSS { | |
| return s | |
| } | |
| b, id := decodeCSS([]byte(s)), make([]byte, 0, 64) | |
| // CSS3 error handling is specified as honoring string boundaries per | |
| // https://www.w3.org/TR/css3-syntax/#error-handling : | |
| // Malformed declarations. User agents must handle unexpected | |
| // tokens encountered while parsing a declaration by reading until | |
| // the end of the declaration, while observing the rules for | |
| // matching pairs of (), [], {}, "", and '', and correctly handling | |
| // escapes. For example, a malformed declaration may be missing a | |
| // property, colon (:) or value. | |
| // So we need to make sure that values do not have mismatched bracket | |
| // or quote characters to prevent the browser from restarting parsing | |
| // inside a string that might embed JavaScript source. | |
| for i, c := range b { | |
| switch c { | |
| case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}', '<', '>': | |
| return filterFailsafe | |
| case '-': | |
| // Disallow <!-- or -->. | |
| // -- should not appear in valid identifiers. | |
| if i != 0 && b[i-1] == '-' { | |
| return filterFailsafe | |
| } | |
| default: | |
| if c < utf8.RuneSelf && isCSSNmchar(rune(c)) { | |
| id = append(id, c) | |
| } | |
| } | |
| } | |
| id = bytes.ToLower(id) | |
| if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) { | |
| return filterFailsafe | |
| } | |
| return string(b) | |
| } | |