muhsin's picture
remove space saving
78c640c
raw
history blame contribute delete
374 Bytes
from typing import List
def calculate_compression(unicode_bytes: List[int], tokens: List[int]) -> float:
raw_size = len(unicode_bytes)
token_size = len(tokens)
if raw_size == 0:
raise ValueError("Raw text size cannot be zero.")
compression_ratio = raw_size / token_size if token_size > 0 else float('inf')
return compression_ratio