Spaces:
Running
Running
File size: 593 Bytes
7566ac3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
import os
from graphgen.bases import BaseEvaluator, QAPair
from graphgen.models.tokenizer import Tokenizer
class LengthEvaluator(BaseEvaluator):
def __init__(self, tokenizer_name: str = None):
tokenizer_model = tokenizer_name or os.environ.get("TOKENIZER_MODEL", "cl100k_base")
self.tokenizer: Tokenizer = Tokenizer(tokenizer_model)
def evaluate(self, pair: QAPair) -> float:
"""
Evaluate the length of the qa pair.
"""
content = pair.question + pair.answer
tokens = self.tokenizer.encode(content)
return len(tokens)
|