| // Copyright 2025 The Go Authors. All rights reserved. | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| //go:build ignore | |
| package main | |
| import ( | |
| "bytes" | |
| _ "embed" | |
| "fmt" | |
| "go/format" | |
| "io" | |
| "log" | |
| "maps" | |
| "os" | |
| "slices" | |
| "strconv" | |
| "strings" | |
| ) | |
| // We embed this source file in the resulting code-generation program in order | |
| // to extract the definitions of the encoding type and constants from it and | |
| // include them in the generated file. | |
| // | |
| //go:embed gen_encoding_table.go | |
| var genSource string | |
| const filename = "encoding_table.go" | |
| func main() { | |
| var out bytes.Buffer | |
| fmt.Fprintln(&out, "// Code generated from gen_encoding_table.go using 'go generate'; DO NOT EDIT.") | |
| fmt.Fprintln(&out) | |
| fmt.Fprintln(&out, "// Copyright 2025 The Go Authors. All rights reserved.") | |
| fmt.Fprintln(&out, "// Use of this source code is governed by a BSD-style") | |
| fmt.Fprintln(&out, "// license that can be found in the LICENSE file.") | |
| fmt.Fprintln(&out) | |
| fmt.Fprintln(&out, "package url") | |
| fmt.Fprintln(&out) | |
| generateEnc(&out, genSource) | |
| generateTable(&out) | |
| formatted, err := format.Source(out.Bytes()) | |
| if err != nil { | |
| log.Fatal("format:", err) | |
| } | |
| err = os.WriteFile(filename, formatted, 0644) | |
| if err != nil { | |
| log.Fatal("WriteFile:", err) | |
| } | |
| } | |
| func generateEnc(w io.Writer, src string) { | |
| var writeLine bool | |
| for line := range strings.Lines(src) { | |
| if strings.HasPrefix(line, "// START encoding") { | |
| writeLine = true | |
| continue | |
| } | |
| if strings.HasPrefix(line, "// END encoding") { | |
| return | |
| } | |
| if writeLine { | |
| fmt.Fprint(w, line) | |
| } | |
| } | |
| } | |
| func generateTable(w io.Writer) { | |
| fmt.Fprintln(w, "var table = [256]encoding{") | |
| // Sort the encodings (in decreasing order) to guarantee a stable output. | |
| sortedEncs := slices.Sorted(maps.Keys(encNames)) | |
| slices.Reverse(sortedEncs) | |
| for i := range 256 { | |
| c := byte(i) | |
| var lineBuf bytes.Buffer | |
| // Write key to line buffer. | |
| lineBuf.WriteString(strconv.QuoteRune(rune(c))) | |
| lineBuf.WriteByte(':') | |
| // Write value to line buffer. | |
| blankVal := true | |
| if ishex(c) { | |
| // Set the hexChar bit if this char is hexadecimal. | |
| lineBuf.WriteString("hexChar") | |
| blankVal = false | |
| } | |
| for _, enc := range sortedEncs { | |
| if !shouldEscape(c, enc) { | |
| if !blankVal { | |
| lineBuf.WriteByte('|') | |
| } | |
| // Set this encoding mode's bit if this char should NOT be | |
| // escaped. | |
| name := encNames[enc] | |
| lineBuf.WriteString(name) | |
| blankVal = false | |
| } | |
| } | |
| if !blankVal { | |
| lineBuf.WriteString(",\n") | |
| w.Write(lineBuf.Bytes()) | |
| } | |
| } | |
| fmt.Fprintln(w, "}") | |
| } | |
| // START encoding (keep this marker comment in sync with genEnc) | |
| type encoding uint8 | |
| const ( | |
| encodePath encoding = 1 << iota | |
| encodePathSegment | |
| encodeHost | |
| encodeZone | |
| encodeUserPassword | |
| encodeQueryComponent | |
| encodeFragment | |
| // hexChar is actually NOT an encoding mode, but there are only seven | |
| // encoding modes. We might as well abuse the otherwise unused most | |
| // significant bit in uint8 to indicate whether a character is | |
| // hexadecimal. | |
| hexChar | |
| ) | |
| // END encoding (keep this marker comment in sync with genEnc) | |
| // Keep this in sync with the definitions of encoding mode constants. | |
| var encNames = map[encoding]string{ | |
| encodePath: "encodePath", | |
| encodePathSegment: "encodePathSegment", | |
| encodeHost: "encodeHost", | |
| encodeZone: "encodeZone", | |
| encodeUserPassword: "encodeUserPassword", | |
| encodeQueryComponent: "encodeQueryComponent", | |
| encodeFragment: "encodeFragment", | |
| } | |
| // Return true if the specified character should be escaped when | |
| // appearing in a URL string, according to RFC 3986. | |
| // | |
| // Please be informed that for now shouldEscape does not check all | |
| // reserved characters correctly. See golang.org/issue/5684. | |
| func shouldEscape(c byte, mode encoding) bool { | |
| // §2.3 Unreserved characters (alphanum) | |
| if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { | |
| return false | |
| } | |
| if mode == encodeHost || mode == encodeZone { | |
| // §3.2.2 Host allows | |
| // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" | |
| // as part of reg-name. | |
| // We add : because we include :port as part of host. | |
| // We add [ ] because we include [ipv6]:port as part of host. | |
| // We add < > because they're the only characters left that | |
| // we could possibly allow, and Parse will reject them if we | |
| // escape them (because hosts can't use %-encoding for | |
| // ASCII bytes). | |
| switch c { | |
| case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"': | |
| return false | |
| } | |
| } | |
| switch c { | |
| case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) | |
| return false | |
| case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) | |
| // Different sections of the URL allow a few of | |
| // the reserved characters to appear unescaped. | |
| switch mode { | |
| case encodePath: // §3.3 | |
| // The RFC allows : @ & = + $ but saves / ; , for assigning | |
| // meaning to individual path segments. This package | |
| // only manipulates the path as a whole, so we allow those | |
| // last three as well. That leaves only ? to escape. | |
| return c == '?' | |
| case encodePathSegment: // §3.3 | |
| // The RFC allows : @ & = + $ but saves / ; , for assigning | |
| // meaning to individual path segments. | |
| return c == '/' || c == ';' || c == ',' || c == '?' | |
| case encodeUserPassword: // §3.2.1 | |
| // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in | |
| // userinfo, so we must escape only '@', '/', and '?'. | |
| // The parsing of userinfo treats ':' as special so we must escape | |
| // that too. | |
| return c == '@' || c == '/' || c == '?' || c == ':' | |
| case encodeQueryComponent: // §3.4 | |
| // The RFC reserves (so we must escape) everything. | |
| return true | |
| case encodeFragment: // §4.1 | |
| // The RFC text is silent but the grammar allows | |
| // everything, so escape nothing. | |
| return false | |
| } | |
| } | |
| if mode == encodeFragment { | |
| // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are | |
| // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not | |
| // need to be escaped. To minimize potential breakage, we apply two restrictions: | |
| // (1) we always escape sub-delims outside of the fragment, and (2) we always | |
| // escape single quote to avoid breaking callers that had previously assumed that | |
| // single quotes would be escaped. See issue #19917. | |
| switch c { | |
| case '!', '(', ')', '*': | |
| return false | |
| } | |
| } | |
| // Everything else must be escaped. | |
| return true | |
| } | |
| func ishex(c byte) bool { | |
| return '0' <= c && c <= '9' || | |
| 'a' <= c && c <= 'f' || | |
| 'A' <= c && c <= 'F' | |
| } | |