| /** | |
| * When inside a double-quoted attribute value, only `&` and `"` hold special meaning. | |
| * @see https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(double-quoted)-state | |
| * @type {Record<string, string>} | |
| */ | |
| const escape_html_attr_dict = { | |
| '&': '&', | |
| '"': '"' | |
| }; | |
| const escape_html_attr_regex = new RegExp( | |
| // special characters | |
| `[${Object.keys(escape_html_attr_dict).join('')}]|` + | |
| // high surrogate without paired low surrogate | |
| '[\\ud800-\\udbff](?![\\udc00-\\udfff])|' + | |
| // a valid surrogate pair, the only match with 2 code units | |
| // we match it so that we can match unpaired low surrogates in the same pass | |
| // TODO: use lookbehind assertions once they are widely supported: (?<![\ud800-udbff])[\udc00-\udfff] | |
| '[\\ud800-\\udbff][\\udc00-\\udfff]|' + | |
| // unpaired low surrogate (see previous match) | |
| '[\\udc00-\\udfff]', | |
| 'g' | |
| ); | |
| /** | |
| * Formats a string to be used as an attribute's value in raw HTML. | |
| * | |
| * It escapes unpaired surrogates (which are allowed in js strings but invalid in HTML), escapes | |
| * characters that are special in attributes, and surrounds the whole string in double-quotes. | |
| * | |
| * @param {string} str | |
| * @returns {string} Escaped string surrounded by double-quotes. | |
| * @example const html = `<tag data-value=${escape_html_attr('value')}>...</tag>`; | |
| */ | |
| export function escape_html_attr(str) { | |
| const escaped_str = str.replace(escape_html_attr_regex, (match) => { | |
| if (match.length === 2) { | |
| // valid surrogate pair | |
| return match; | |
| } | |
| return escape_html_attr_dict[match] ?? `&#${match.charCodeAt(0)};`; | |
| }); | |
| return `"${escaped_str}"`; | |
| } | |