Upload folder using huggingface_hub

59b7eeb verified 6 months ago

538 Bytes

	# Copyright (c) Facebook, Inc. and its affiliates.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	import re


	SPACE_NORMALIZER = re.compile(r"\s+")


	def tokenize_line(line):
	if not isinstance(line, str):
	return line
	line = SPACE_NORMALIZER.sub(" ", line)
	line = line.strip()
	return line.split()

	def char_tokenizer(line):
	line = line.strip().replace(' ', '\|')+'\|'
	char_list = []
	char_list[:0] = line
	return char_list