| // Copyright 2010 The Go Authors. All rights reserved. | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| package textproto | |
| import ( | |
| "bufio" | |
| "bytes" | |
| "errors" | |
| "fmt" | |
| "io" | |
| "math" | |
| "strconv" | |
| "strings" | |
| "sync" | |
| _ "unsafe" // for linkname | |
| ) | |
| // TODO: This should be a distinguishable error (ErrMessageTooLarge) | |
| // to allow mime/multipart to detect it. | |
| var errMessageTooLarge = errors.New("message too large") | |
| // A Reader implements convenience methods for reading requests | |
| // or responses from a text protocol network connection. | |
| type Reader struct { | |
| R *bufio.Reader | |
| dot *dotReader | |
| buf []byte // a re-usable buffer for readContinuedLineSlice | |
| } | |
| // NewReader returns a new [Reader] reading from r. | |
| // | |
| // To avoid denial of service attacks, the provided [bufio.Reader] | |
| // should be reading from an [io.LimitReader] or similar Reader to bound | |
| // the size of responses. | |
| func NewReader(r *bufio.Reader) *Reader { | |
| return &Reader{R: r} | |
| } | |
| // ReadLine reads a single line from r, | |
| // eliding the final \n or \r\n from the returned string. | |
| func (r *Reader) ReadLine() (string, error) { | |
| line, err := r.readLineSlice(-1) | |
| return string(line), err | |
| } | |
| // ReadLineBytes is like [Reader.ReadLine] but returns a []byte instead of a string. | |
| func (r *Reader) ReadLineBytes() ([]byte, error) { | |
| line, err := r.readLineSlice(-1) | |
| if line != nil { | |
| line = bytes.Clone(line) | |
| } | |
| return line, err | |
| } | |
| // readLineSlice reads a single line from r, | |
| // up to lim bytes long (or unlimited if lim is less than 0), | |
| // eliding the final \r or \r\n from the returned string. | |
| func (r *Reader) readLineSlice(lim int64) ([]byte, error) { | |
| r.closeDot() | |
| var line []byte | |
| for { | |
| l, more, err := r.R.ReadLine() | |
| if err != nil { | |
| return nil, err | |
| } | |
| if lim >= 0 && int64(len(line))+int64(len(l)) > lim { | |
| return nil, errMessageTooLarge | |
| } | |
| // Avoid the copy if the first call produced a full line. | |
| if line == nil && !more { | |
| return l, nil | |
| } | |
| line = append(line, l...) | |
| if !more { | |
| break | |
| } | |
| } | |
| return line, nil | |
| } | |
| // ReadContinuedLine reads a possibly continued line from r, | |
| // eliding the final trailing ASCII white space. | |
| // Lines after the first are considered continuations if they | |
| // begin with a space or tab character. In the returned data, | |
| // continuation lines are separated from the previous line | |
| // only by a single space: the newline and leading white space | |
| // are removed. | |
| // | |
| // For example, consider this input: | |
| // | |
| // Line 1 | |
| // continued... | |
| // Line 2 | |
| // | |
| // The first call to ReadContinuedLine will return "Line 1 continued..." | |
| // and the second will return "Line 2". | |
| // | |
| // Empty lines are never continued. | |
| func (r *Reader) ReadContinuedLine() (string, error) { | |
| line, err := r.readContinuedLineSlice(-1, noValidation) | |
| return string(line), err | |
| } | |
| // trim returns s with leading and trailing spaces and tabs removed. | |
| // It does not assume Unicode or UTF-8. | |
| func trim(s []byte) []byte { | |
| i := 0 | |
| for i < len(s) && (s[i] == ' ' || s[i] == '\t') { | |
| i++ | |
| } | |
| s = s[i:] | |
| n := len(s) - 1 | |
| for n >= 0 && (s[n] == ' ' || s[n] == '\t') { | |
| n-- | |
| } | |
| return s[:n+1] | |
| } | |
| // ReadContinuedLineBytes is like [Reader.ReadContinuedLine] but | |
| // returns a []byte instead of a string. | |
| func (r *Reader) ReadContinuedLineBytes() ([]byte, error) { | |
| line, err := r.readContinuedLineSlice(-1, noValidation) | |
| if line != nil { | |
| line = bytes.Clone(line) | |
| } | |
| return line, err | |
| } | |
| // readContinuedLineSlice reads continued lines from the reader buffer, | |
| // returning a byte slice with all lines. The validateFirstLine function | |
| // is run on the first read line, and if it returns an error then this | |
| // error is returned from readContinuedLineSlice. | |
| // It reads up to lim bytes of data (or unlimited if lim is less than 0). | |
| func (r *Reader) readContinuedLineSlice(lim int64, validateFirstLine func([]byte) error) ([]byte, error) { | |
| if validateFirstLine == nil { | |
| return nil, fmt.Errorf("missing validateFirstLine func") | |
| } | |
| // Read the first line. | |
| line, err := r.readLineSlice(lim) | |
| if err != nil { | |
| return nil, err | |
| } | |
| if len(line) == 0 { // blank line - no continuation | |
| return line, nil | |
| } | |
| if err := validateFirstLine(line); err != nil { | |
| return nil, err | |
| } | |
| // Optimistically assume that we have started to buffer the next line | |
| // and it starts with an ASCII letter (the next header key), or a blank | |
| // line, so we can avoid copying that buffered data around in memory | |
| // and skipping over non-existent whitespace. | |
| if r.R.Buffered() > 1 { | |
| peek, _ := r.R.Peek(2) | |
| if len(peek) > 0 && (isASCIILetter(peek[0]) || peek[0] == '\n') || | |
| len(peek) == 2 && peek[0] == '\r' && peek[1] == '\n' { | |
| return trim(line), nil | |
| } | |
| } | |
| // ReadByte or the next readLineSlice will flush the read buffer; | |
| // copy the slice into buf. | |
| r.buf = append(r.buf[:0], trim(line)...) | |
| if lim < 0 { | |
| lim = math.MaxInt64 | |
| } | |
| lim -= int64(len(r.buf)) | |
| // Read continuation lines. | |
| for r.skipSpace() > 0 { | |
| r.buf = append(r.buf, ' ') | |
| if int64(len(r.buf)) >= lim { | |
| return nil, errMessageTooLarge | |
| } | |
| line, err := r.readLineSlice(lim - int64(len(r.buf))) | |
| if err != nil { | |
| break | |
| } | |
| r.buf = append(r.buf, trim(line)...) | |
| } | |
| return r.buf, nil | |
| } | |
| // skipSpace skips R over all spaces and returns the number of bytes skipped. | |
| func (r *Reader) skipSpace() int { | |
| n := 0 | |
| for { | |
| c, err := r.R.ReadByte() | |
| if err != nil { | |
| // Bufio will keep err until next read. | |
| break | |
| } | |
| if c != ' ' && c != '\t' { | |
| r.R.UnreadByte() | |
| break | |
| } | |
| n++ | |
| } | |
| return n | |
| } | |
| func (r *Reader) readCodeLine(expectCode int) (code int, continued bool, message string, err error) { | |
| line, err := r.ReadLine() | |
| if err != nil { | |
| return | |
| } | |
| return parseCodeLine(line, expectCode) | |
| } | |
| func parseCodeLine(line string, expectCode int) (code int, continued bool, message string, err error) { | |
| if len(line) < 4 || line[3] != ' ' && line[3] != '-' { | |
| err = ProtocolError("short response: " + line) | |
| return | |
| } | |
| continued = line[3] == '-' | |
| code, err = strconv.Atoi(line[0:3]) | |
| if err != nil || code < 100 { | |
| err = ProtocolError("invalid response code: " + line) | |
| return | |
| } | |
| message = line[4:] | |
| if 1 <= expectCode && expectCode < 10 && code/100 != expectCode || | |
| 10 <= expectCode && expectCode < 100 && code/10 != expectCode || | |
| 100 <= expectCode && expectCode < 1000 && code != expectCode { | |
| err = &Error{code, message} | |
| } | |
| return | |
| } | |
| // ReadCodeLine reads a response code line of the form | |
| // | |
| // code message | |
| // | |
| // where code is a three-digit status code and the message | |
| // extends to the rest of the line. An example of such a line is: | |
| // | |
| // 220 plan9.bell-labs.com ESMTP | |
| // | |
| // If the prefix of the status does not match the digits in expectCode, | |
| // ReadCodeLine returns with err set to &Error{code, message}. | |
| // For example, if expectCode is 31, an error will be returned if | |
| // the status is not in the range [310,319]. | |
| // | |
| // If the response is multi-line, ReadCodeLine returns an error. | |
| // | |
| // An expectCode <= 0 disables the check of the status code. | |
| func (r *Reader) ReadCodeLine(expectCode int) (code int, message string, err error) { | |
| code, continued, message, err := r.readCodeLine(expectCode) | |
| if err == nil && continued { | |
| err = ProtocolError("unexpected multi-line response: " + message) | |
| } | |
| return | |
| } | |
| // ReadResponse reads a multi-line response of the form: | |
| // | |
| // code-message line 1 | |
| // code-message line 2 | |
| // ... | |
| // code message line n | |
| // | |
| // where code is a three-digit status code. The first line starts with the | |
| // code and a hyphen. The response is terminated by a line that starts | |
| // with the same code followed by a space. Each line in message is | |
| // separated by a newline (\n). | |
| // | |
| // See page 36 of RFC 959 (https://www.ietf.org/rfc/rfc959.txt) for | |
| // details of another form of response accepted: | |
| // | |
| // code-message line 1 | |
| // message line 2 | |
| // ... | |
| // code message line n | |
| // | |
| // If the prefix of the status does not match the digits in expectCode, | |
| // ReadResponse returns with err set to &Error{code, message}. | |
| // For example, if expectCode is 31, an error will be returned if | |
| // the status is not in the range [310,319]. | |
| // | |
| // An expectCode <= 0 disables the check of the status code. | |
| func (r *Reader) ReadResponse(expectCode int) (code int, message string, err error) { | |
| code, continued, first, err := r.readCodeLine(expectCode) | |
| multi := continued | |
| var messageBuilder strings.Builder | |
| messageBuilder.WriteString(first) | |
| for continued { | |
| line, err := r.ReadLine() | |
| if err != nil { | |
| return 0, "", err | |
| } | |
| var code2 int | |
| var moreMessage string | |
| code2, continued, moreMessage, err = parseCodeLine(line, 0) | |
| if err != nil || code2 != code { | |
| messageBuilder.WriteByte('\n') | |
| messageBuilder.WriteString(strings.TrimRight(line, "\r\n")) | |
| continued = true | |
| continue | |
| } | |
| messageBuilder.WriteByte('\n') | |
| messageBuilder.WriteString(moreMessage) | |
| } | |
| message = messageBuilder.String() | |
| if err != nil && multi && message != "" { | |
| // replace one line error message with all lines (full message) | |
| err = &Error{code, message} | |
| } | |
| return | |
| } | |
| // DotReader returns a new [Reader] that satisfies Reads using the | |
| // decoded text of a dot-encoded block read from r. | |
| // The returned Reader is only valid until the next call | |
| // to a method on r. | |
| // | |
| // Dot encoding is a common framing used for data blocks | |
| // in text protocols such as SMTP. The data consists of a sequence | |
| // of lines, each of which ends in "\r\n". The sequence itself | |
| // ends at a line containing just a dot: ".\r\n". Lines beginning | |
| // with a dot are escaped with an additional dot to avoid | |
| // looking like the end of the sequence. | |
| // | |
| // The decoded form returned by the Reader's Read method | |
| // rewrites the "\r\n" line endings into the simpler "\n", | |
| // removes leading dot escapes if present, and stops with error [io.EOF] | |
| // after consuming (and discarding) the end-of-sequence line. | |
| func (r *Reader) DotReader() io.Reader { | |
| r.closeDot() | |
| r.dot = &dotReader{r: r} | |
| return r.dot | |
| } | |
| type dotReader struct { | |
| r *Reader | |
| state int | |
| } | |
| // Read satisfies reads by decoding dot-encoded data read from d.r. | |
| func (d *dotReader) Read(b []byte) (n int, err error) { | |
| // Run data through a simple state machine to | |
| // elide leading dots, rewrite trailing \r\n into \n, | |
| // and detect ending .\r\n line. | |
| const ( | |
| stateBeginLine = iota // beginning of line; initial state; must be zero | |
| stateDot // read . at beginning of line | |
| stateDotCR // read .\r at beginning of line | |
| stateCR // read \r (possibly at end of line) | |
| stateData // reading data in middle of line | |
| stateEOF // reached .\r\n end marker line | |
| ) | |
| br := d.r.R | |
| for n < len(b) && d.state != stateEOF { | |
| var c byte | |
| c, err = br.ReadByte() | |
| if err != nil { | |
| if err == io.EOF { | |
| err = io.ErrUnexpectedEOF | |
| } | |
| break | |
| } | |
| switch d.state { | |
| case stateBeginLine: | |
| if c == '.' { | |
| d.state = stateDot | |
| continue | |
| } | |
| if c == '\r' { | |
| d.state = stateCR | |
| continue | |
| } | |
| d.state = stateData | |
| case stateDot: | |
| if c == '\r' { | |
| d.state = stateDotCR | |
| continue | |
| } | |
| if c == '\n' { | |
| d.state = stateEOF | |
| continue | |
| } | |
| d.state = stateData | |
| case stateDotCR: | |
| if c == '\n' { | |
| d.state = stateEOF | |
| continue | |
| } | |
| // Not part of .\r\n. | |
| // Consume leading dot and emit saved \r. | |
| br.UnreadByte() | |
| c = '\r' | |
| d.state = stateData | |
| case stateCR: | |
| if c == '\n' { | |
| d.state = stateBeginLine | |
| break | |
| } | |
| // Not part of \r\n. Emit saved \r | |
| br.UnreadByte() | |
| c = '\r' | |
| d.state = stateData | |
| case stateData: | |
| if c == '\r' { | |
| d.state = stateCR | |
| continue | |
| } | |
| if c == '\n' { | |
| d.state = stateBeginLine | |
| } | |
| } | |
| b[n] = c | |
| n++ | |
| } | |
| if err == nil && d.state == stateEOF { | |
| err = io.EOF | |
| } | |
| if err != nil && d.r.dot == d { | |
| d.r.dot = nil | |
| } | |
| return | |
| } | |
| // closeDot drains the current DotReader if any, | |
| // making sure that it reads until the ending dot line. | |
| func (r *Reader) closeDot() { | |
| if r.dot == nil { | |
| return | |
| } | |
| buf := make([]byte, 128) | |
| for r.dot != nil { | |
| // When Read reaches EOF or an error, | |
| // it will set r.dot == nil. | |
| r.dot.Read(buf) | |
| } | |
| } | |
| // ReadDotBytes reads a dot-encoding and returns the decoded data. | |
| // | |
| // See the documentation for the [Reader.DotReader] method for details about dot-encoding. | |
| func (r *Reader) ReadDotBytes() ([]byte, error) { | |
| return io.ReadAll(r.DotReader()) | |
| } | |
| // ReadDotLines reads a dot-encoding and returns a slice | |
| // containing the decoded lines, with the final \r\n or \n elided from each. | |
| // | |
| // See the documentation for the [Reader.DotReader] method for details about dot-encoding. | |
| func (r *Reader) ReadDotLines() ([]string, error) { | |
| // We could use ReadDotBytes and then Split it, | |
| // but reading a line at a time avoids needing a | |
| // large contiguous block of memory and is simpler. | |
| var v []string | |
| var err error | |
| for { | |
| var line string | |
| line, err = r.ReadLine() | |
| if err != nil { | |
| if err == io.EOF { | |
| err = io.ErrUnexpectedEOF | |
| } | |
| break | |
| } | |
| // Dot by itself marks end; otherwise cut one dot. | |
| if len(line) > 0 && line[0] == '.' { | |
| if len(line) == 1 { | |
| break | |
| } | |
| line = line[1:] | |
| } | |
| v = append(v, line) | |
| } | |
| return v, err | |
| } | |
| var colon = []byte(":") | |
| // ReadMIMEHeader reads a MIME-style header from r. | |
| // The header is a sequence of possibly continued Key: Value lines | |
| // ending in a blank line. | |
| // The returned map m maps [CanonicalMIMEHeaderKey](key) to a | |
| // sequence of values in the same order encountered in the input. | |
| // | |
| // For example, consider this input: | |
| // | |
| // My-Key: Value 1 | |
| // Long-Key: Even | |
| // Longer Value | |
| // My-Key: Value 2 | |
| // | |
| // Given that input, ReadMIMEHeader returns the map: | |
| // | |
| // map[string][]string{ | |
| // "My-Key": {"Value 1", "Value 2"}, | |
| // "Long-Key": {"Even Longer Value"}, | |
| // } | |
| func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) { | |
| return readMIMEHeader(r, math.MaxInt64, math.MaxInt64) | |
| } | |
| // readMIMEHeader is accessed from mime/multipart. | |
| //go:linkname readMIMEHeader | |
| // readMIMEHeader is a version of ReadMIMEHeader which takes a limit on the header size. | |
| // It is called by the mime/multipart package. | |
| func readMIMEHeader(r *Reader, maxMemory, maxHeaders int64) (MIMEHeader, error) { | |
| // Avoid lots of small slice allocations later by allocating one | |
| // large one ahead of time which we'll cut up into smaller | |
| // slices. If this isn't big enough later, we allocate small ones. | |
| var strs []string | |
| hint := r.upcomingHeaderKeys() | |
| if hint > 0 { | |
| if hint > 1000 { | |
| hint = 1000 // set a cap to avoid overallocation | |
| } | |
| strs = make([]string, hint) | |
| } | |
| m := make(MIMEHeader, hint) | |
| // Account for 400 bytes of overhead for the MIMEHeader, plus 200 bytes per entry. | |
| // Benchmarking map creation as of go1.20, a one-entry MIMEHeader is 416 bytes and large | |
| // MIMEHeaders average about 200 bytes per entry. | |
| maxMemory -= 400 | |
| const mapEntryOverhead = 200 | |
| // The first line cannot start with a leading space. | |
| if buf, err := r.R.Peek(1); err == nil && (buf[0] == ' ' || buf[0] == '\t') { | |
| const errorLimit = 80 // arbitrary limit on how much of the line we'll quote | |
| line, err := r.readLineSlice(errorLimit) | |
| if err != nil { | |
| return m, err | |
| } | |
| return m, ProtocolError("malformed MIME header initial line: " + string(line)) | |
| } | |
| for { | |
| kv, err := r.readContinuedLineSlice(maxMemory, mustHaveFieldNameColon) | |
| if len(kv) == 0 { | |
| return m, err | |
| } | |
| // Key ends at first colon. | |
| k, v, ok := bytes.Cut(kv, colon) | |
| if !ok { | |
| return m, ProtocolError("malformed MIME header line: " + string(kv)) | |
| } | |
| key, ok := canonicalMIMEHeaderKey(k) | |
| if !ok { | |
| return m, ProtocolError("malformed MIME header line: " + string(kv)) | |
| } | |
| for _, c := range v { | |
| if !validHeaderValueByte(c) { | |
| return m, ProtocolError("malformed MIME header line: " + string(kv)) | |
| } | |
| } | |
| maxHeaders-- | |
| if maxHeaders < 0 { | |
| return nil, errMessageTooLarge | |
| } | |
| // Skip initial spaces in value. | |
| value := string(bytes.TrimLeft(v, " \t")) | |
| vv := m[key] | |
| if vv == nil { | |
| maxMemory -= int64(len(key)) | |
| maxMemory -= mapEntryOverhead | |
| } | |
| maxMemory -= int64(len(value)) | |
| if maxMemory < 0 { | |
| return m, errMessageTooLarge | |
| } | |
| if vv == nil && len(strs) > 0 { | |
| // More than likely this will be a single-element key. | |
| // Most headers aren't multi-valued. | |
| // Set the capacity on strs[0] to 1, so any future append | |
| // won't extend the slice into the other strings. | |
| vv, strs = strs[:1:1], strs[1:] | |
| vv[0] = value | |
| m[key] = vv | |
| } else { | |
| m[key] = append(vv, value) | |
| } | |
| if err != nil { | |
| return m, err | |
| } | |
| } | |
| } | |
| // noValidation is a no-op validation func for readContinuedLineSlice | |
| // that permits any lines. | |
| func noValidation(_ []byte) error { return nil } | |
| // mustHaveFieldNameColon ensures that, per RFC 7230, the | |
| // field-name is on a single line, so the first line must | |
| // contain a colon. | |
| func mustHaveFieldNameColon(line []byte) error { | |
| if bytes.IndexByte(line, ':') < 0 { | |
| return ProtocolError(fmt.Sprintf("malformed MIME header: missing colon: %q", line)) | |
| } | |
| return nil | |
| } | |
| var nl = []byte("\n") | |
| // upcomingHeaderKeys returns an approximation of the number of keys | |
| // that will be in this header. If it gets confused, it returns 0. | |
| func (r *Reader) upcomingHeaderKeys() (n int) { | |
| // Try to determine the 'hint' size. | |
| r.R.Peek(1) // force a buffer load if empty | |
| s := r.R.Buffered() | |
| if s == 0 { | |
| return | |
| } | |
| peek, _ := r.R.Peek(s) | |
| for len(peek) > 0 && n < 1000 { | |
| var line []byte | |
| line, peek, _ = bytes.Cut(peek, nl) | |
| if len(line) == 0 || (len(line) == 1 && line[0] == '\r') { | |
| // Blank line separating headers from the body. | |
| break | |
| } | |
| if line[0] == ' ' || line[0] == '\t' { | |
| // Folded continuation of the previous line. | |
| continue | |
| } | |
| n++ | |
| } | |
| return n | |
| } | |
| // CanonicalMIMEHeaderKey returns the canonical format of the | |
| // MIME header key s. The canonicalization converts the first | |
| // letter and any letter following a hyphen to upper case; | |
| // the rest are converted to lowercase. For example, the | |
| // canonical key for "accept-encoding" is "Accept-Encoding". | |
| // MIME header keys are assumed to be ASCII only. | |
| // If s contains a space or invalid header field bytes as | |
| // defined by RFC 9112, it is returned without modifications. | |
| func CanonicalMIMEHeaderKey(s string) string { | |
| // Quick check for canonical encoding. | |
| upper := true | |
| for i := 0; i < len(s); i++ { | |
| c := s[i] | |
| if !validHeaderFieldByte(c) { | |
| return s | |
| } | |
| if upper && 'a' <= c && c <= 'z' { | |
| s, _ = canonicalMIMEHeaderKey([]byte(s)) | |
| return s | |
| } | |
| if !upper && 'A' <= c && c <= 'Z' { | |
| s, _ = canonicalMIMEHeaderKey([]byte(s)) | |
| return s | |
| } | |
| upper = c == '-' | |
| } | |
| return s | |
| } | |
| const toLower = 'a' - 'A' | |
| // validHeaderFieldByte reports whether c is a valid byte in a header | |
| // field name. RFC 7230 says: | |
| // | |
| // header-field = field-name ":" OWS field-value OWS | |
| // field-name = token | |
| // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / | |
| // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA | |
| // token = 1*tchar | |
| func validHeaderFieldByte(c byte) bool { | |
| // mask is a 128-bit bitmap with 1s for allowed bytes, | |
| // so that the byte c can be tested with a shift and an and. | |
| // If c >= 128, then 1<<c and 1<<(c-64) will both be zero, | |
| // and this function will return false. | |
| const mask = 0 | | |
| (1<<(10)-1)<<'0' | | |
| (1<<(26)-1)<<'a' | | |
| (1<<(26)-1)<<'A' | | |
| 1<<'!' | | |
| 1<<'#' | | |
| 1<<'$' | | |
| 1<<'%' | | |
| 1<<'&' | | |
| 1<<'\'' | | |
| 1<<'*' | | |
| 1<<'+' | | |
| 1<<'-' | | |
| 1<<'.' | | |
| 1<<'^' | | |
| 1<<'_' | | |
| 1<<'`' | | |
| 1<<'|' | | |
| 1<<'~' | |
| return ((uint64(1)<<c)&(mask&(1<<64-1)) | | |
| (uint64(1)<<(c-64))&(mask>>64)) != 0 | |
| } | |
| // validHeaderValueByte reports whether c is a valid byte in a header | |
| // field value. RFC 7230 says: | |
| // | |
| // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] | |
| // field-vchar = VCHAR / obs-text | |
| // obs-text = %x80-FF | |
| // | |
| // RFC 5234 says: | |
| // | |
| // HTAB = %x09 | |
| // SP = %x20 | |
| // VCHAR = %x21-7E | |
| func validHeaderValueByte(c byte) bool { | |
| // mask is a 128-bit bitmap with 1s for allowed bytes, | |
| // so that the byte c can be tested with a shift and an and. | |
| // If c >= 128, then 1<<c and 1<<(c-64) will both be zero. | |
| // Since this is the obs-text range, we invert the mask to | |
| // create a bitmap with 1s for disallowed bytes. | |
| const mask = 0 | | |
| (1<<(0x7f-0x21)-1)<<0x21 | // VCHAR: %x21-7E | |
| 1<<0x20 | // SP: %x20 | |
| 1<<0x09 // HTAB: %x09 | |
| return ((uint64(1)<<c)&^(mask&(1<<64-1)) | | |
| (uint64(1)<<(c-64))&^(mask>>64)) == 0 | |
| } | |
| // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is | |
| // allowed to mutate the provided byte slice before returning the | |
| // string. | |
| // | |
| // For invalid inputs (if a contains spaces or non-token bytes), a | |
| // is unchanged and a string copy is returned. | |
| // | |
| // ok is true if the header key contains only valid characters and spaces. | |
| // ReadMIMEHeader accepts header keys containing spaces, but does not | |
| // canonicalize them. | |
| func canonicalMIMEHeaderKey(a []byte) (_ string, ok bool) { | |
| if len(a) == 0 { | |
| return "", false | |
| } | |
| // See if a looks like a header key. If not, return it unchanged. | |
| noCanon := false | |
| for _, c := range a { | |
| if validHeaderFieldByte(c) { | |
| continue | |
| } | |
| // Don't canonicalize. | |
| if c == ' ' { | |
| // We accept invalid headers with a space before the | |
| // colon, but must not canonicalize them. | |
| // See https://go.dev/issue/34540. | |
| noCanon = true | |
| continue | |
| } | |
| return string(a), false | |
| } | |
| if noCanon { | |
| return string(a), true | |
| } | |
| upper := true | |
| for i, c := range a { | |
| // Canonicalize: first letter upper case | |
| // and upper case after each dash. | |
| // (Host, User-Agent, If-Modified-Since). | |
| // MIME headers are ASCII only, so no Unicode issues. | |
| if upper && 'a' <= c && c <= 'z' { | |
| c -= toLower | |
| } else if !upper && 'A' <= c && c <= 'Z' { | |
| c += toLower | |
| } | |
| a[i] = c | |
| upper = c == '-' // for next time | |
| } | |
| commonHeaderOnce.Do(initCommonHeader) | |
| // The compiler recognizes m[string(byteSlice)] as a special | |
| // case, so a copy of a's bytes into a new string does not | |
| // happen in this map lookup: | |
| if v := commonHeader[string(a)]; v != "" { | |
| return v, true | |
| } | |
| return string(a), true | |
| } | |
| // commonHeader interns common header strings. | |
| var commonHeader map[string]string | |
| var commonHeaderOnce sync.Once | |
| func initCommonHeader() { | |
| commonHeader = make(map[string]string) | |
| for _, v := range []string{ | |
| "Accept", | |
| "Accept-Charset", | |
| "Accept-Encoding", | |
| "Accept-Language", | |
| "Accept-Ranges", | |
| "Cache-Control", | |
| "Cc", | |
| "Connection", | |
| "Content-Id", | |
| "Content-Language", | |
| "Content-Length", | |
| "Content-Transfer-Encoding", | |
| "Content-Type", | |
| "Cookie", | |
| "Date", | |
| "Dkim-Signature", | |
| "Etag", | |
| "Expires", | |
| "From", | |
| "Host", | |
| "If-Modified-Since", | |
| "If-None-Match", | |
| "In-Reply-To", | |
| "Last-Modified", | |
| "Location", | |
| "Message-Id", | |
| "Mime-Version", | |
| "Pragma", | |
| "Received", | |
| "Return-Path", | |
| "Server", | |
| "Set-Cookie", | |
| "Subject", | |
| "To", | |
| "User-Agent", | |
| "Via", | |
| "X-Forwarded-For", | |
| "X-Imforwards", | |
| "X-Powered-By", | |
| } { | |
| commonHeader[v] = v | |
| } | |
| } | |