File size: 374 Bytes
78c640c
1b90c64
78c640c
1b90c64
 
 
 
 
 
 
 
78c640c
1
2
3
4
5
6
7
8
9
10
11
12
from typing import List

def calculate_compression(unicode_bytes: List[int], tokens: List[int]) -> float:
    raw_size = len(unicode_bytes)
    token_size = len(tokens)
    
    if raw_size == 0:
        raise ValueError("Raw text size cannot be zero.")
    
    compression_ratio = raw_size / token_size if token_size > 0 else float('inf')
    
    return compression_ratio