Spaces:

tnp554
/

SQuAD

Sleeping

SQuAD / utils /squad_preprocess.py

feat: deploy SQuAD backend with all AI models

09daf0b 18 days ago

680 Bytes

	from utils.preprocess import tokenize

	def process_sample(sample):
	context_tokens = tokenize(sample["context"])
	question_tokens = tokenize(sample["question"])
	answer_tokens = tokenize(sample["answer_text"])

	# 🔥 Combine question + context
	tokens = question_tokens + ["[SEP]"] + context_tokens

	start = -1
	for i in range(len(context_tokens)):
	if context_tokens[i:i+len(answer_tokens)] == answer_tokens:
	start = i + len(question_tokens) + 1
	break

	if start == -1:
	return None

	end = start + len(answer_tokens) - 1

	return {
	"tokens": tokens,
	"start": start,
	"end": end
	}