Upload folder using huggingface_hub

e36aeda verified about 1 month ago

14.6 kB

	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package template

	import (
	"bytes"
	"encoding/json"
	"fmt"
	"reflect"
	"regexp"
	"strings"
	"unicode/utf8"
	)

	// jsWhitespace contains all of the JS whitespace characters, as defined
	// by the \s character class.
	// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes.
	const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff"

	// nextJSCtx returns the context that determines whether a slash after the
	// given run of tokens starts a regular expression instead of a division
	// operator: / or /=.
	//
	// This assumes that the token run does not include any string tokens, comment
	// tokens, regular expression literal tokens, or division operators.
	//
	// This fails on some valid but nonsensical JavaScript programs like
	// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
	// fail on any known useful programs. It is based on the draft
	// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
	// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
	func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
	// Trim all JS whitespace characters
	s = bytes.TrimRight(s, jsWhitespace)
	if len(s) == 0 {
	return preceding
	}

	// All cases below are in the single-byte UTF-8 group.
	switch c, n := s[len(s)-1], len(s); c {
	case '+', '-':
	// ++ and -- are not regexp preceders, but + and - are whether
	// they are used as infix or prefix operators.
	start := n - 1
	// Count the number of adjacent dashes or pluses.
	for start > 0 && s[start-1] == c {
	start--
	}
	if (n-start)&1 == 1 {
	// Reached for trailing minus signs since "---" is the
	// same as "-- -".
	return jsCtxRegexp
	}
	return jsCtxDivOp
	case '.':
	// Handle "42."
	if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
	return jsCtxDivOp
	}
	return jsCtxRegexp
	// Suffixes for all punctuators from section 7.7 of the language spec
	// that only end binary operators not handled above.
	case ',', '<', '>', '=', '*', '%', '&', '\|', '^', '?':
	return jsCtxRegexp
	// Suffixes for all punctuators from section 7.7 of the language spec
	// that are prefix operators not handled above.
	case '!', '~':
	return jsCtxRegexp
	// Matches all the punctuators from section 7.7 of the language spec
	// that are open brackets not handled above.
	case '(', '[':
	return jsCtxRegexp
	// Matches all the punctuators from section 7.7 of the language spec
	// that precede expression starts.
	case ':', ';', '{':
	return jsCtxRegexp
	// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
	// are handled in the default except for '}' which can precede a
	// division op as in
	// ({ valueOf: function () { return 42 } } / 2
	// which is valid, but, in practice, developers don't divide object
	// literals, so our heuristic works well for code like
	// function () { ... } /foo/.test(x) && sideEffect();
	// The ')' punctuator can precede a regular expression as in
	// if (b) /foo/.test(x) && ...
	// but this is much less likely than
	// (a + b) / c
	case '}':
	return jsCtxRegexp
	default:
	// Look for an IdentifierName and see if it is a keyword that
	// can precede a regular expression.
	j := n
	for j > 0 && isJSIdentPart(rune(s[j-1])) {
	j--
	}
	if regexpPrecederKeywords[string(s[j:])] {
	return jsCtxRegexp
	}
	}
	// Otherwise is a punctuator not listed above, or
	// a string which precedes a div op, or an identifier
	// which precedes a div op.
	return jsCtxDivOp
	}

	// regexpPrecederKeywords is a set of reserved JS keywords that can precede a
	// regular expression in JS source.
	var regexpPrecederKeywords = map[string]bool{
	"break": true,
	"case": true,
	"continue": true,
	"delete": true,
	"do": true,
	"else": true,
	"finally": true,
	"in": true,
	"instanceof": true,
	"return": true,
	"throw": true,
	"try": true,
	"typeof": true,
	"void": true,
	}

	var jsonMarshalType = reflect.TypeFor[json.Marshaler]()

	// indirectToJSONMarshaler returns the value, after dereferencing as many times
	// as necessary to reach the base type (or nil) or an implementation of json.Marshal.
	func indirectToJSONMarshaler(a any) any {
	// text/template now supports passing untyped nil as a func call
	// argument, so we must support it. Otherwise we'd panic below, as one
	// cannot call the Type or Interface methods on an invalid
	// reflect.Value. See golang.org/issue/18716.
	if a == nil {
	return nil
	}

	v := reflect.ValueOf(a)
	for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() {
	v = v.Elem()
	}
	return v.Interface()
	}

	var scriptTagRe = regexp.MustCompile("(?i)<(/?)script")

	// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
	// neither side-effects nor free variables outside (NaN, Infinity).
	func jsValEscaper(args ...any) string {
	var a any
	if len(args) == 1 {
	a = indirectToJSONMarshaler(args[0])
	switch t := a.(type) {
	case JS:
	return string(t)
	case JSStr:
	// TODO: normalize quotes.
	return `"` + string(t) + `"`
	case json.Marshaler:
	// Do not treat as a Stringer.
	case fmt.Stringer:
	a = t.String()
	}
	} else {
	for i, arg := range args {
	args[i] = indirectToJSONMarshaler(arg)
	}
	a = fmt.Sprint(args...)
	}
	// TODO: detect cycles before calling Marshal which loops infinitely on
	// cyclic data. This may be an unacceptable DoS risk.
	b, err := json.Marshal(a)
	if err != nil {
	// While the standard JSON marshaler does not include user controlled
	// information in the error message, if a type has a MarshalJSON method,
	// the content of the error message is not guaranteed. Since we insert
	// the error into the template, as part of a comment, we attempt to
	// prevent the error from either terminating the comment, or the script
	// block itself.
	//
	// In particular we:
	// * replace "/" comment end tokens with " /", which does not
	// terminate the comment
	// * replace "<script" and "</script" with "\x3Cscript" and "\x3C/script"
	// (case insensitively), and "<!--" with "\x3C!--", which prevents
	// confusing script block termination semantics
	//
	// We also put a space before the comment so that if it is flush against
	// a division operator it is not turned into a line comment:
	// x/{{y}}
	// turning into
	// x//* error marshaling y:
	// second line of error message */null
	errStr := err.Error()
	errStr = string(scriptTagRe.ReplaceAll([]byte(errStr), []byte(`\x3C${1}script`)))
	errStr = strings.ReplaceAll(errStr, "/", " /")
	errStr = strings.ReplaceAll(errStr, "<!--", `\x3C!--`)
	return fmt.Sprintf(" /* %s */null ", errStr)
	}

	// TODO: maybe post-process output to prevent it from containing
	// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
	// in case custom marshalers produce output containing those.
	// Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
	// supports ld+json content-type.
	if len(b) == 0 {
	// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
	// not cause the output `x=y/*z`.
	return " null "
	}
	first, _ := utf8.DecodeRune(b)
	last, _ := utf8.DecodeLastRune(b)
	var buf strings.Builder
	// Prevent IdentifierNames and NumericLiterals from running into
	// keywords: in, instanceof, typeof, void
	pad := isJSIdentPart(first) \|\| isJSIdentPart(last)
	if pad {
	buf.WriteByte(' ')
	}
	written := 0
	// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
	// so it falls within the subset of JSON which is valid JS.
	for i := 0; i < len(b); {
	rune, n := utf8.DecodeRune(b[i:])
	repl := ""
	if rune == 0x2028 {
	repl = `\u2028`
	} else if rune == 0x2029 {
	repl = `\u2029`
	}
	if repl != "" {
	buf.Write(b[written:i])
	buf.WriteString(repl)
	written = i + n
	}
	i += n
	}
	if buf.Len() != 0 {
	buf.Write(b[written:])
	if pad {
	buf.WriteByte(' ')
	}
	return buf.String()
	}
	return string(b)
	}

	// jsStrEscaper produces a string that can be included between quotes in
	// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
	// or in an HTML5 event handler attribute such as onclick.
	func jsStrEscaper(args ...any) string {
	s, t := stringify(args...)
	if t == contentTypeJSStr {
	return replace(s, jsStrNormReplacementTable)
	}
	return replace(s, jsStrReplacementTable)
	}

	func jsTmplLitEscaper(args ...any) string {
	s, _ := stringify(args...)
	return replace(s, jsBqStrReplacementTable)
	}

	// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
	// specials so the result is treated literally when included in a regular
	// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
	// the literal text of {{.X}} followed by the string "bar".
	func jsRegexpEscaper(args ...any) string {
	s, _ := stringify(args...)
	s = replace(s, jsRegexpReplacementTable)
	if s == "" {
	// /{{.X}}/ should not produce a line comment when .X == "".
	return "(?:)"
	}
	return s
	}

	// replace replaces each rune r of s with replacementTable[r], provided that
	// r < len(replacementTable). If replacementTable[r] is the empty string then
	// no replacement is made.
	// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
	// `\u2029`.
	func replace(s string, replacementTable []string) string {
	var b strings.Builder
	r, w, written := rune(0), 0, 0
	for i := 0; i < len(s); i += w {
	// See comment in htmlEscaper.
	r, w = utf8.DecodeRuneInString(s[i:])
	var repl string
	switch {
	case int(r) < len(lowUnicodeReplacementTable):
	repl = lowUnicodeReplacementTable[r]
	case int(r) < len(replacementTable) && replacementTable[r] != "":
	repl = replacementTable[r]
	case r == '\u2028':
	repl = `\u2028`
	case r == '\u2029':
	repl = `\u2029`
	default:
	continue
	}
	if written == 0 {
	b.Grow(len(s))
	}
	b.WriteString(s[written:i])
	b.WriteString(repl)
	written = i + w
	}
	if written == 0 {
	return s
	}
	b.WriteString(s[written:])
	return b.String()
	}

	var lowUnicodeReplacementTable = []string{
	0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
	'\a': `\u0007`,
	'\b': `\u0008`,
	'\t': `\t`,
	'\n': `\n`,
	'\v': `\u000b`, // "\v" == "v" on IE 6.
	'\f': `\f`,
	'\r': `\r`,
	0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
	0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
	0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
	}

	var jsStrReplacementTable = []string{
	0: `\u0000`,
	'\t': `\t`,
	'\n': `\n`,
	'\v': `\u000b`, // "\v" == "v" on IE 6.
	'\f': `\f`,
	'\r': `\r`,
	// Encode HTML specials as hex so the output can be embedded
	// in HTML attributes without further encoding.
	'"': `\u0022`,
	'`': `\u0060`,
	'&': `\u0026`,
	'\'': `\u0027`,
	'+': `\u002b`,
	'/': `\/`,
	'<': `\u003c`,
	'>': `\u003e`,
	'\\': `\\`,
	}

	// jsBqStrReplacementTable is like jsStrReplacementTable except it also contains
	// the special characters for JS template literals: $, {, and }.
	var jsBqStrReplacementTable = []string{
	0: `\u0000`,
	'\t': `\t`,
	'\n': `\n`,
	'\v': `\u000b`, // "\v" == "v" on IE 6.
	'\f': `\f`,
	'\r': `\r`,
	// Encode HTML specials as hex so the output can be embedded
	// in HTML attributes without further encoding.
	'"': `\u0022`,
	'`': `\u0060`,
	'&': `\u0026`,
	'\'': `\u0027`,
	'+': `\u002b`,
	'/': `\/`,
	'<': `\u003c`,
	'>': `\u003e`,
	'\\': `\\`,
	'$': `\u0024`,
	'{': `\u007b`,
	'}': `\u007d`,
	}

	// jsStrNormReplacementTable is like jsStrReplacementTable but does not
	// overencode existing escapes since this table has no entry for `\`.
	var jsStrNormReplacementTable = []string{
	0: `\u0000`,
	'\t': `\t`,
	'\n': `\n`,
	'\v': `\u000b`, // "\v" == "v" on IE 6.
	'\f': `\f`,
	'\r': `\r`,
	// Encode HTML specials as hex so the output can be embedded
	// in HTML attributes without further encoding.
	'"': `\u0022`,
	'&': `\u0026`,
	'\'': `\u0027`,
	'`': `\u0060`,
	'+': `\u002b`,
	'/': `\/`,
	'<': `\u003c`,
	'>': `\u003e`,
	}
	var jsRegexpReplacementTable = []string{
	0: `\u0000`,
	'\t': `\t`,
	'\n': `\n`,
	'\v': `\u000b`, // "\v" == "v" on IE 6.
	'\f': `\f`,
	'\r': `\r`,
	// Encode HTML specials as hex so the output can be embedded
	// in HTML attributes without further encoding.
	'"': `\u0022`,
	'$': `\$`,
	'&': `\u0026`,
	'\'': `\u0027`,
	'(': `\(`,
	')': `\)`,
	'': `\`,
	'+': `\u002b`,
	'-': `\-`,
	'.': `\.`,
	'/': `\/`,
	'<': `\u003c`,
	'>': `\u003e`,
	'?': `\?`,
	'[': `\[`,
	'\\': `\\`,
	']': `\]`,
	'^': `\^`,
	'{': `\{`,
	'\|': `\\|`,
	'}': `\}`,
	}

	// isJSIdentPart reports whether the given rune is a JS identifier part.
	// It does not handle all the non-Latin letters, joiners, and combining marks,
	// but it does handle every codepoint that can occur in a numeric literal or
	// a keyword.
	func isJSIdentPart(r rune) bool {
	switch {
	case r == '$':
	return true
	case '0' <= r && r <= '9':
	return true
	case 'A' <= r && r <= 'Z':
	return true
	case r == '_':
	return true
	case 'a' <= r && r <= 'z':
	return true
	}
	return false
	}

	// isJSType reports whether the given MIME type should be considered JavaScript.
	//
	// It is used to determine whether a script tag with a type attribute is a javascript container.
	func isJSType(mimeType string) bool {
	// per
	// https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
	// https://tools.ietf.org/html/rfc7231#section-3.1.1
	// https://tools.ietf.org/html/rfc4329#section-3
	// https://www.ietf.org/rfc/rfc4627.txt
	// discard parameters
	mimeType, _, _ = strings.Cut(mimeType, ";")
	mimeType = strings.ToLower(mimeType)
	mimeType = strings.TrimSpace(mimeType)
	switch mimeType {
	case
	"application/ecmascript",
	"application/javascript",
	"application/json",
	"application/ld+json",
	"application/x-ecmascript",
	"application/x-javascript",
	"module",
	"text/ecmascript",
	"text/javascript",
	"text/javascript1.0",
	"text/javascript1.1",
	"text/javascript1.2",
	"text/javascript1.3",
	"text/javascript1.4",
	"text/javascript1.5",
	"text/jscript",
	"text/livescript",
	"text/x-ecmascript",
	"text/x-javascript":
	return true
	default:
	return false
	}
	}