Upload folder using huggingface_hub

e36aeda verified 3 months ago

13.4 kB

	// Copyright 2025 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package cgroup

	import (
	"internal/bytealg"
	"internal/strconv"
	)

	var (
	ErrNoCgroup error = stringError("not in a cgroup")

	errMalformedFile error = stringError("malformed file")
	)

	const _PATH_MAX = 4096

	const (
	// Required amount of scratch space for CPULimit.
	//
	// TODO(prattmic): This is shockingly large (~70KiB) due to the (very
	// unlikely) combination of extremely long paths consisting mostly
	// escaped characters. The scratch buffer ends up in .bss in package
	// runtime, so it doesn't contribute to binary size and generally won't
	// be faulted in, but it would still be nice to shrink this. A more
	// complex parser that did not need to keep entire lines in memory
	// could get away with much less. Alternatively, we could do a one-off
	// mmap allocation for this buffer, which is only mapped larger if we
	// actually need the extra space.
	ScratchSize = PathSize + ParseSize

	// Required space to store a path of the cgroup in the filesystem.
	PathSize = _PATH_MAX

	// /proc/self/mountinfo path escape sequences are 4 characters long, so
	// a path consisting entirely of escaped characters could be 4 times
	// larger.
	escapedPathMax = 4 * _PATH_MAX

	// Required space to parse /proc/self/mountinfo and /proc/self/cgroup.
	// See findCPUMount and findCPURelativePath.
	ParseSize = 4 * escapedPathMax
	)

	// Version indicates the cgroup version.
	type Version int

	const (
	VersionUnknown Version = iota
	V1
	V2
	)

	func parseV1Number(buf []byte) (int64, error) {
	// Ignore trailing newline.
	i := bytealg.IndexByte(buf, '\n')
	if i < 0 {
	return 0, errMalformedFile
	}
	buf = buf[:i]

	val, err := strconv.ParseInt(string(buf), 10, 64)
	if err != nil {
	return 0, errMalformedFile
	}

	return val, nil
	}

	func parseV2Limit(buf []byte) (float64, bool, error) {
	i := bytealg.IndexByte(buf, ' ')
	if i < 0 {
	return 0, false, errMalformedFile
	}

	quotaStr := buf[:i]
	if bytealg.Compare(quotaStr, []byte("max")) == 0 {
	// No limit.
	return 0, false, nil
	}

	periodStr := buf[i+1:]
	// Ignore trailing newline, if any.
	i = bytealg.IndexByte(periodStr, '\n')
	if i < 0 {
	return 0, false, errMalformedFile
	}
	periodStr = periodStr[:i]

	quota, err := strconv.ParseInt(string(quotaStr), 10, 64)
	if err != nil {
	return 0, false, errMalformedFile
	}

	period, err := strconv.ParseInt(string(periodStr), 10, 64)
	if err != nil {
	return 0, false, errMalformedFile
	}

	return float64(quota) / float64(period), true, nil
	}

	// Finds the path of the current process's CPU cgroup and writes it to out.
	//
	// fd is a file descriptor for /proc/self/cgroup.
	// Returns the number of bytes written and the cgroup version (1 or 2).
	func parseCPUCgroup(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, Version, error) {
	// The format of each line is
	//
	// hierarchy-ID:controller-list:cgroup-path
	//
	// controller-list is comma-separated.
	//
	// cgroup v2 has hierarchy-ID 0. If a v1 hierarchy contains "cpu", that
	// is the CPU controller. Otherwise the v2 hierarchy (if any) is the
	// CPU controller. It is not possible to mount the same controller
	// simultaneously under both the v1 and the v2 hierarchies.
	//
	// See man 7 cgroups for more details.
	//
	// hierarchy-ID and controller-list have relatively small maximum
	// sizes, and the path can be up to _PATH_MAX, so we need a bit more
	// than 1 _PATH_MAX of scratch space.

	l := newLineReader(fd, scratch, read)

	// Bytes written to out.
	n := 0

	for {
	err := l.next()
	if err == errIncompleteLine {
	// Don't allow incomplete lines. While in theory the
	// incomplete line may be for a controller we don't
	// care about, in practice all lines should be of
	// similar length, so we should just have a buffer big
	// enough for any.
	return 0, 0, err
	} else if err == errEOF {
	break
	} else if err != nil {
	return 0, 0, err
	}

	line := l.line()

	// The format of each line is
	//
	// hierarchy-ID:controller-list:cgroup-path
	//
	// controller-list is comma-separated.
	// See man 7 cgroups for more details.
	i := bytealg.IndexByte(line, ':')
	if i < 0 {
	return 0, 0, errMalformedFile
	}

	hierarchy := line[:i]
	line = line[i+1:]

	i = bytealg.IndexByte(line, ':')
	if i < 0 {
	return 0, 0, errMalformedFile
	}

	controllers := line[:i]
	line = line[i+1:]

	path := line
	if len(path) == 0 \|\| path[0] != '/' {
	// We rely on this when composing the full path.
	return 0, 0, errMalformedFile
	}
	if len(path) > len(out) {
	// Should not be possible. If we really get a very long cgroup path,
	// read /proc/self/cgroup will fail with ENAMETOOLONG.
	return 0, 0, errPathTooLong
	}

	if string(hierarchy) == "0" {
	// v2 hierarchy.
	n = copy(out, path)
	// Keep searching, we might find a v1 hierarchy with a
	// CPU controller, which takes precedence.
	} else {
	// v1 hierarchy
	if containsCPU(controllers) {
	// Found a v1 CPU controller. This must be the
	// only one, so we're done.
	return copy(out, path), V1, nil
	}
	}
	}

	if n == 0 {
	// Found nothing.
	return 0, 0, ErrNoCgroup
	}

	// Must be v2, v1 returns above.
	return n, V2, nil
	}

	// Returns true if comma-separated list b contains "cpu".
	func containsCPU(b []byte) bool {
	for len(b) > 0 {
	i := bytealg.IndexByte(b, ',')
	if i < 0 {
	// Neither cmd/compile nor gccgo allocates for these string conversions.
	return string(b) == "cpu"
	}

	curr := b[:i]
	rest := b[i+1:]

	if string(curr) == "cpu" {
	return true
	}

	b = rest
	}

	return false
	}

	// Returns the path to the specified cgroup and version with cpu controller
	//
	// fd is a file descriptor for /proc/self/mountinfo.
	// Returns the number of bytes written.
	func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out, cgroup []byte, version Version, scratch []byte) (int, error) {
	// The format of each line is:
	//
	// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
	// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
	//
	// (1) mount ID: unique identifier of the mount (may be reused after umount)
	// (2) parent ID: ID of parent (or of self for the top of the mount tree)
	// (3) major:minor: value of st_dev for files on filesystem
	// (4) root: root of the mount within the filesystem
	// (5) mount point: mount point relative to the process's root
	// (6) mount options: per mount options
	// (7) optional fields: zero or more fields of the form "tag[:value]"
	// (8) separator: marks the end of the optional fields
	// (9) filesystem type: name of filesystem of the form "type[.subtype]"
	// (10) mount source: filesystem specific information or "none"
	// (11) super options: per super block options
	//
	// See man 5 proc_pid_mountinfo for more details.
	//
	// Note that emitted paths will not contain space, tab, newline, or
	// carriage return. Those are escaped. See Linux show_mountinfo ->
	// show_path. We must unescape before returning.
	//
	// A mount point matches if the filesystem type (9) is cgroup2,
	// or cgroup with "cpu" in the super options (11),
	// and the cgroup is in the root (4). If there are multiple matches,
	// the first one is selected.
	//
	// We return full cgroup path, which is the mount point (5) +
	// cgroup parameter without the root (4) prefix.
	//
	// (4), (5), and (10) are up to _PATH_MAX. The remaining fields have a
	// small fixed maximum size, so 4*_PATH_MAX is plenty of scratch space.
	// Note that non-cgroup mounts may have arbitrarily long (11), but we
	// can skip those when parsing.

	l := newLineReader(fd, scratch, read)

	for {
	err := l.next()
	if err == errIncompleteLine {
	// An incomplete line is fine as long as it doesn't
	// impede parsing the fields we need. It shouldn't be
	// possible for any mount to use more than 3*PATH_MAX
	// before (9) because there are two paths and all other
	// earlier fields have bounded options. Only (11) has
	// unbounded options.
	} else if err == errEOF {
	break
	} else if err != nil {
	return 0, err
	}

	line := l.line()

	// Skip first three fields.
	for range 3 {
	i := bytealg.IndexByte(line, ' ')
	if i < 0 {
	return 0, errMalformedFile
	}
	line = line[i+1:]
	}

	// (4) root: root of the mount within the filesystem
	i := bytealg.IndexByte(line, ' ')
	if i < 0 {
	return 0, errMalformedFile
	}
	root := line[:i]
	if len(root) == 0 \|\| root[0] != '/' {
	// We rely on this in hasPathPrefix.
	return 0, errMalformedFile
	}
	line = line[i+1:]

	// (5) mount point: mount point relative to the process's root
	i = bytealg.IndexByte(line, ' ')
	if i < 0 {
	return 0, errMalformedFile
	}
	mnt := line[:i]
	line = line[i+1:]

	// Skip ahead past optional fields, delimited by " - ".
	for {
	i = bytealg.IndexByte(line, ' ')
	if i < 0 {
	return 0, errMalformedFile
	}
	if i+3 >= len(line) {
	return 0, errMalformedFile
	}
	delim := line[i : i+3]
	if string(delim) == " - " {
	line = line[i+3:]
	break
	}
	line = line[i+1:]
	}

	// (9) filesystem type: name of filesystem of the form "type[.subtype]"
	i = bytealg.IndexByte(line, ' ')
	if i < 0 {
	return 0, errMalformedFile
	}
	ftype := line[:i]
	line = line[i+1:]

	switch version {
	case V1:
	if string(ftype) != "cgroup" {
	continue
	}
	// (10) mount source: filesystem specific information or "none"
	i = bytealg.IndexByte(line, ' ')
	if i < 0 {
	return 0, errMalformedFile
	}
	// Don't care about mount source.
	line = line[i+1:]

	// (11) super options: per super block options
	if !containsCPU(line) {
	continue
	}
	case V2:
	if string(ftype) != "cgroup2" {
	continue
	}
	default:
	throw("impossible cgroup version")
	panic("unreachable")
	}

	// Check cgroup is in the root.
	// If the cgroup is /sandbox/container, the matching mount point root could be
	// /sandbox/container, /sandbox, or /
	rootLen, err := unescapePath(root, root)
	if err != nil {
	return 0, err
	}
	root = root[:rootLen]
	if !hasPathPrefix(cgroup, root) {
	continue // not matched, this is not the mount point we're looking for
	}

	// Cutoff the root from cgroup, ensure rel starts with '/' or is empty.
	rel := cgroup[rootLen:]
	if rootLen == 1 && len(cgroup) > 1 {
	// root is "/", but cgroup is not. Keep full cgroup path.
	rel = cgroup
	}
	if hasPathPrefix(rel, []byte("/..")) {
	// the cgroup is out of current cgroup namespace, and this mount point
	// cannot reach that cgroup.
	//
	// e.g. If the process is in cgroup /init, but in a cgroup namespace
	// rooted at /sandbox/container, /proc/self/cgroup will show /../../init.
	// we can reach it if the mount point root is
	// /../.. or /../../init, but not if it is /.. or /
	// While mount point with root /../../.. should able to reach the cgroup,
	// we don't know the path to the cgroup within that mount point.
	continue
	}

	// All conditions met, compose the full path.
	// Copy rel to the correct place first, it may overlap with out.
	n := unescapedLen(mnt)
	if n+len(rel) > len(out) {
	return 0, errPathTooLong
	}
	copy(out[n:], rel)
	n2, err := unescapePath(out[:n], mnt)
	if err != nil {
	return 0, err
	}
	if n2 != n {
	throw("wrong unescaped len")
	}
	return n + len(rel), nil
	}

	// Found nothing.
	return 0, ErrNoCgroup
	}

	func hasPathPrefix(p, prefix []byte) bool {
	i := len(prefix)
	if i == 1 {
	return true // root contains everything
	}
	if len(p) < i \|\| !bytealg.Equal(prefix, p[:i]) {
	return false
	}
	return len(p) == i \|\| p[i] == '/' // must match at path boundary
	}

	var (
	errInvalidEscape error = stringError("invalid path escape sequence")
	errPathTooLong error = stringError("path too long")
	)

	func unescapedLen(in []byte) int {
	return len(in) - bytealg.Count(in, byte('\\'))*3
	}

	// unescapePath copies in to out, unescaping escape sequences generated by
	// Linux's show_path.
	//
	// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
	// like '\040' for space.
	//
	// Caller must ensure that out at least has unescapedLen(in) bytes.
	// in and out may alias; in-place unescaping is supported.
	//
	// Returns the number of bytes written to out.
	//
	// Also see escapePath in cgroup_linux_test.go.
	func unescapePath(out []byte, in []byte) (int, error) {
	var outi, ini int
	for ini < len(in) {
	if outi >= len(out) {
	// given that caller already ensured out is long enough, this
	// is only possible if there are malformed escape sequences
	// we have not parsed yet.
	return outi, errInvalidEscape
	}
	c := in[ini]
	if c != '\\' {
	out[outi] = c
	outi++
	ini++
	continue
	}

	// Start of escape sequence.

	// Escape sequence is always 4 characters: one slash and three
	// digits.
	if ini+3 >= len(in) {
	return outi, errInvalidEscape
	}

	var outc int
	for i := range 3 {
	c := in[ini+1+i]
	if c < '0' \|\| c > '7' {
	return outi, errInvalidEscape
	}

	outc *= 8
	outc += int(c - '0')
	}

	if outc > 0xFF {
	return outi, errInvalidEscape
	}
	out[outi] = byte(outc)
	outi++

	ini += 4
	}

	return outi, nil
	}