Upload folder using huggingface_hub

e36aeda verified about 1 month ago

4.96 kB

	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package csv

	import (
	"bufio"
	"io"
	"strings"
	"unicode"
	"unicode/utf8"
	)

	// A Writer writes records using CSV encoding.
	//
	// As returned by [NewWriter], a Writer writes records terminated by a
	// newline and uses ',' as the field delimiter. The exported fields can be
	// changed to customize the details before
	// the first call to [Writer.Write] or [Writer.WriteAll].
	//
	// [Writer.Comma] is the field delimiter.
	//
	// If [Writer.UseCRLF] is true,
	// the Writer ends each output line with \r\n instead of \n.
	//
	// The writes of individual records are buffered.
	// After all data has been written, the client should call the
	// [Writer.Flush] method to guarantee all data has been forwarded to
	// the underlying [io.Writer]. Any errors that occurred should
	// be checked by calling the [Writer.Error] method.
	type Writer struct {
	Comma rune // Field delimiter (set to ',' by NewWriter)
	UseCRLF bool // True to use \r\n as the line terminator
	w *bufio.Writer
	}

	// NewWriter returns a new Writer that writes to w.
	func NewWriter(w io.Writer) *Writer {
	return &Writer{
	Comma: ',',
	w: bufio.NewWriter(w),
	}
	}

	// Write writes a single CSV record to w along with any necessary quoting.
	// A record is a slice of strings with each string being one field.
	// Writes are buffered, so [Writer.Flush] must eventually be called to ensure
	// that the record is written to the underlying [io.Writer].
	func (w *Writer) Write(record []string) error {
	if !validDelim(w.Comma) {
	return errInvalidDelim
	}

	for n, field := range record {
	if n > 0 {
	if _, err := w.w.WriteRune(w.Comma); err != nil {
	return err
	}
	}

	// If we don't have to have a quoted field then just
	// write out the field and continue to the next field.
	if !w.fieldNeedsQuotes(field) {
	if _, err := w.w.WriteString(field); err != nil {
	return err
	}
	continue
	}

	if err := w.w.WriteByte('"'); err != nil {
	return err
	}
	for len(field) > 0 {
	// Search for special characters.
	i := strings.IndexAny(field, "\"\r\n")
	if i < 0 {
	i = len(field)
	}

	// Copy verbatim everything before the special character.
	if _, err := w.w.WriteString(field[:i]); err != nil {
	return err
	}
	field = field[i:]

	// Encode the special character.
	if len(field) > 0 {
	var err error
	switch field[0] {
	case '"':
	_, err = w.w.WriteString(`""`)
	case '\r':
	if !w.UseCRLF {
	err = w.w.WriteByte('\r')
	}
	case '\n':
	if w.UseCRLF {
	_, err = w.w.WriteString("\r\n")
	} else {
	err = w.w.WriteByte('\n')
	}
	}
	field = field[1:]
	if err != nil {
	return err
	}
	}
	}
	if err := w.w.WriteByte('"'); err != nil {
	return err
	}
	}
	var err error
	if w.UseCRLF {
	_, err = w.w.WriteString("\r\n")
	} else {
	err = w.w.WriteByte('\n')
	}
	return err
	}

	// Flush writes any buffered data to the underlying [io.Writer].
	// To check if an error occurred during Flush, call [Writer.Error].
	func (w *Writer) Flush() {
	w.w.Flush()
	}

	// Error reports any error that has occurred during
	// a previous [Writer.Write] or [Writer.Flush].
	func (w *Writer) Error() error {
	_, err := w.w.Write(nil)
	return err
	}

	// WriteAll writes multiple CSV records to w using [Writer.Write] and
	// then calls [Writer.Flush], returning any error from the Flush.
	func (w *Writer) WriteAll(records [][]string) error {
	for _, record := range records {
	err := w.Write(record)
	if err != nil {
	return err
	}
	}
	return w.w.Flush()
	}

	// fieldNeedsQuotes reports whether our field must be enclosed in quotes.
	// Fields with a Comma, fields with a quote or newline, and
	// fields which start with a space must be enclosed in quotes.
	// We used to quote empty strings, but we do not anymore (as of Go 1.4).
	// The two representations should be equivalent, but Postgres distinguishes
	// quoted vs non-quoted empty string during database imports, and it has
	// an option to force the quoted behavior for non-quoted CSV but it has
	// no option to force the non-quoted behavior for quoted CSV, making
	// CSV with quoted empty strings strictly less useful.
	// Not quoting the empty string also makes this package match the behavior
	// of Microsoft Excel and Google Drive.
	// For Postgres, quote the data terminating string `\.`.
	func (w *Writer) fieldNeedsQuotes(field string) bool {
	if field == "" {
	return false
	}

	if field == `\.` {
	return true
	}

	if w.Comma < utf8.RuneSelf {
	for i := 0; i < len(field); i++ {
	c := field[i]
	if c == '\n' \|\| c == '\r' \|\| c == '"' \|\| c == byte(w.Comma) {
	return true
	}
	}
	} else {
	if strings.ContainsRune(field, w.Comma) \|\| strings.ContainsAny(field, "\"\r\n") {
	return true
	}
	}

	r1, _ := utf8.DecodeRuneInString(field)
	return unicode.IsSpace(r1)
	}