Spaces:

AxL95
/

medically

Runtime error

medically / utils.py

Ajout fichiers pour mieux structurer le code

92343a7 verified 8 months ago

860 Bytes

	import re

	def simulate_token_count(text):
	"""
	simulation token
	"""
	if not text:
	return 0

	text = text.replace('\n', ' \n ')

	spaces_and_punct = sum(1 for c in text if c.isspace() or c in ',.;:!?()[]{}"\'`-_=+<>/@#$%^&*\|\\')

	digits = sum(1 for c in text if c.isdigit())

	words = text.split()
	short_words = sum(1 for w in words if len(w) <= 2)

	code_blocks = len(re.findall(r'```[\s\S]*?```', text))
	urls = len(re.findall(r'https?://\S+', text))

	adjusted_length = len(text) - spaces_and_punct - digits - short_words

	token_count = (
	adjusted_length / 4 +
	spaces_and_punct * 0.25 +
	digits * 0.5 +
	short_words * 0.5 +
	code_blocks * 5 +
	urls * 4
	)

	return int(token_count * 1.1) + 1